Corpus overview plots¶
%load_ext autoreload
%autoreload 2
import os
from git import Repo
import dimcat as dc
import ms3
corpus_path = "~/romantic_piano_corpus"
repo = Repo(corpus_path)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(corpus_path)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Notebook repository 'dimcat' @ 10cf791
Data repo 'romantic_piano_corpus' @ c3ac88c
dimcat version 0.2.0.post1.dev109+g2332fcf.d20230210
ms3 version 1.2.3
from fractions import Fraction
from IPython.display import HTML
import plotly.express as px
import colorlover
import pandas as pd
pd.set_option("display.max_columns", 100)
STD_LAYOUT = {
'paper_bgcolor': '#FFFFFF',
'plot_bgcolor': '#FFFFFF',
'margin': {'l': 40, 'r': 0, 'b': 0, 't': 40, 'pad': 0},
'font': {'size': 15}
}
#OUTPUT_DIR = "/home/hentsche/Documents/phd/romantic_piano_corpus_report/figures/"
OUTPUT_DIR = os.path.join(corpus_path, 'figures')
os.makedirs(OUTPUT_DIR, exist_ok=True)
#HTML(colorlover.to_html(colorlover.scales))
HTML(colorlover.to_html(colorlover.scales['9']['qual']['Paired']))
fig = px.colors.qualitative.swatches()
fig.show()
corpus_color_scale = px.colors.qualitative.D3
Overview¶
dataset = dc.Dataset()
dataset.load(directory=corpus_path)
dataset.data
[default|all]
All corpora
-----------
View: This view is called 'default'. It
- excludes fnames that are not contained in the metadata,
- filters out file extensions requiring conversion (such as .xml), and
- excludes review files and folders.
has active scores measures notes expanded
metadata view detected detected parsed detected parsed detected parsed
corpus
beethoven_piano_sonatas yes default 87 87 87 87 87 64 64
chopin_mazurkas yes default 55 55 55 55 55 55 55
debussy_suite_bergamasque yes default 4 4 4 4 4 4 4
dvorak_silhouettes yes default 12 12 12 12 12 12 12
grieg_lyric_pieces yes default 66 66 66 66 66 66 66
liszt_pelerinage yes default 19 19 19 19 19 19 19
medtner_tales yes default 19 19 19 19 19 19 19
schumann_kinderszenen yes default 13 13 13 13 13 13 13
tchaikovsky_seasons yes default 12 12 12 12 12 12 12
824/2236 files are excluded from this view.
792 files have been excluded based on their subdir.
32 files have been excluded based on their file name.
There are 2 orphans that could not be attributed to any of the respective corpus's fnames.
Metadata¶
all_metadata = dataset.data.metadata()
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {len(dataset.pieces)} scores.")
all_metadata.groupby(level=0).nth(0)
Concatenated 'metadata.tsv' files cover 287 of the 287 scores.
| TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | n_onsets | n_onset_positions | guitar_chord_count | form_label_count | label_count | annotated_key | harmony_version | annotators | reviewers | composed_start | composed_end | composed_source | composer | workTitle | movementNumber | movementTitle | workNumber | poet | lyricist | arranger | copyright | creationDate | mscVersion | platform | source | translator | title_text | subtitle_text | lyricist_text | composer_text | musescore | ms3_version | subdirectory | rel_path | has_drumset | ambitus | imslp | musicbrainz | viaf | wikidata | originalFormat | staff_1_ambitus | staff_1_instrument | staff_2_ambitus | staff_2_instrument | score_integrity | imslp.1 | key | mode | typesetter | text | score integrity | comments | staff_3_ambitus | staff_3_instrument | staff_4_ambitus | staff_4_instrument | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 1: 2/2 | 1: -4 | 154 | 152 | 608.0 | 308.0 | 304.0 | 1216.0 | NaN | 1476.00 | 1679 | 985 | 0 | 0 | 241 | f | 2.3.0 | Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) | AN | 1793 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 1 | 1 | Allegro | op.2/1 | NaN | NaN | NaN | NaN | 2019-03-05 | 3.02 | Apple Macintosh | NaN | NaN | Sonata no. 1 | 1. Allegro | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/01-1.mscx | False | 32-89 (Ab1-F6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/a78520e0-0211-3b5... | https://viaf.org/viaf/179625665 | https://www.wikidata.org/wiki/Q145813 | xml | 51-89 (Eb3-F6) | piano | 32-73 (Ab1-Db5) | piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| chopin_mazurkas | 1: 3/4 | 1: 2 | 65 | 64 | 193.0 | 65.0 | 64.0 | 193.0 | NaN | 711.00 | 810 | 274 | 0 | 0 | 116 | b | 2.3.0 | Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... | JH, AN, DK | 1837 | 1837 | OxfordMusicOnline | Frédéric Chopin | Mazurkas | 2 | NaN | Op. 30 | NaN | NaN | NaN | NaN | 2019-02-08 | 3.02 | Apple Macintosh | https://github.com/craigsapp/chopin-mazurkas | NaN | Mazurkas, Op. 30 | Mazurka in b, Op. 30, no. 2 | NaN | Frédéric Chopin | 3.6.2 | 1.1.1 | MS3 | MS3/BI105-2op30-2.mscx | False | 35-90 (B1-F#6) | https://imslp.org/wiki/Mazurkas%2C_Op.30_(Chop... | https://musicbrainz.org/work/13e317ea-5e50-3d5... | NaN | https://www.wikidata.org/wiki/Q6799054 | xml | 59-90 (B3-F#6) | piano | 35-71 (B1-B4) | piano | Cédric Koller | NaN | B minor / F sharp | minor | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| debussy_suite_bergamasque | 1: 4/4 | 1: -1 | 89 | 89 | 356.0 | 89.0 | 89.0 | 356.0 | NaN | 1533.67 | 1721 | 870 | 0 | 0 | 274 | F | 2.3.0 | Adrian Nagel (2.1.1), Amelia Brey (2.3.0) | AB, AN | 1890 | 1905 | Oxford Music Online | Claude Debussy | Suite Bergamasque | 1 | Prelude | L.75 | NaN | NaN | NaN | NaN | 2015-05-19 | 3.02 | Microsoft Windows | http://musescore.com/score/890041 | NaN | Suite Bergamasque | 1. Prelude | NaN | Claude Debussy | 3.6.2 | 1.1.1 | MS3 | MS3/l075-01_suite_prelude.mscx | False | 24-94 (C1-Bb6) | https://imslp.org/wiki/Suite_bergamasque_(Debu... | https://musicbrainz.org/work/fe4cfa64-156a-3d7... | https://viaf.org/viaf/177398380 | https://www.wikidata.org/wiki/Q29117932 | NaN | 48-94 (C3-Bb6) | Piano | 24-90 (C1-F#6) | Piano | NaN | NaN | NaN | NaN | NaN | <b>Prélude</b> | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| dvorak_silhouettes | 1: 6/8 | 1: 4, 7: -5, 49: 4 | 54 | 52 | 156.5 | 54.0 | 52.0 | 156.5 | NaN | 658.75 | 957 | 288 | 0 | 0 | 80 | c# | 2.3.0 | Daniel Grote (2.1.1), Hanné Becker (2.3.0) | Johannes Hentschel (2.1.1), AN | 1875 | 1879 | OxfordMusicOnline | Antonín Dvořák | Silhouettes | 1 | Allegro feroce | op. 8 | NaN | NaN | NaN | NaN | 2018-05-26 | 3.02 | Microsoft Windows | NaN | NaN | Silhouettes, op. 8 | 1. Allegro feroce | NaN | Antonín Dvořák | 3.6.2 | 1.1.1 | MS3 | MS3/op08n01.mscx | False | 32-92 (G#1-Ab6) | https://imslp.org/wiki/Silhouettes%2C_Op.8_(Dv... | https://musicbrainz.org/work/80bb714e-a36a-425... | https://viaf.org/viaf/174794325/ | NaN | xml | 56-92 (G#3-Ab6) | Piano | 32-68 (G#1-G#4) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | https://imslp.org/wiki/Special:ReverseLookup/5... | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | NaN |
| grieg_lyric_pieces | 1: 2/4 | 1: -3 | 23 | 23 | 46.0 | 23.0 | 23.0 | 46.0 | NaN | 135.50 | 268 | 156 | 0 | 0 | 43 | Eb | 2.3.0 | Adrian Nagel (2.1.1), John Heilig (2.30) | Adrian Nagel | 1864 | 1867 | OxfordMusicOnline | Edvard Grieg | Lyric Pieces | 1 | Arietta | Op. 12 | NaN | NaN | NaN | NaN | 2018-10-11 | 3.02 | Microsoft Windows | NaN | NaN | Lyric Pieces, Op. 12 | 1. Arietta | NaN | Edvard Grieg | 3.6.2 | 1.1.1 | MS3 | MS3/op12n01.mscx | False | 39-79 (Eb2-G5) | https://imslp.org/wiki/Lyric_Pieces,_Op.12_(Gr... | https://musicbrainz.org/work/b6115546-141a-336... | NaN | https://www.wikidata.org/wiki/Q2304758 | mxl | 55-79 (G3-G5) | NaN | 39-71 (Eb2-Cb5) | NaN | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | https://imslp.eu/files/imglnks/euimg/8/8e/IMSL... | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| liszt_pelerinage | 1: 4/4 | 1: 0 | 97 | 97 | 388.0 | 97.0 | 97.0 | 388.0 | NaN | 1902.42 | 2879 | 1069 | 0 | 0 | 174 | C | 2.3.0 | Adrian Nagel (2.1.1), Amelia Brey (2.3.0) | Johannes Hentschel (1-33 & 82-97), AB, AN | 1848 | 1855 | OxfordMusicOnline | Franz Liszt | Années de Pèlerinage, Première année: Suisse | 1 | Chapelle de Guillaume Tell | S.160 | NaN | NaN | NaN | NaN | 2019-01-26 | 3.02 | Microsoft Windows | https://musescore.com/score/3987861 | NaN | Années de Pèlerinage, Première année: Suisse, ... | <font size="18"/>1. CHAPELLE DE GUILLAUME TELL... | NaN | Franz Liszt | 3.6.2 | 1.1.1 | MS3 | MS3/160.01_Chapelle_de_Guillaume_Tell.mscx | False | 24-96 (C1-C7) | https://imslp.org/wiki/Ann%C3%A9es_de_p%C3%A8l... | https://musicbrainz.org/work/5804701d-54a6-4c9... | https://viaf.org/viaf/179020308/ | https://www.wikidata.org/wiki/Q567462 | xml | 40-96 (E2-C7) | Piano | 24-79 (C1-G5) | Piano | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | https://imslp.org/wiki/Special:ReverseLookup/1... | NaN | NaN |
| medtner_tales | 1: 4/8 | 1: -3 | 81 | 81 | 162.0 | 81.0 | 81.0 | 162.0 | NaN | 603.00 | 1481 | 528 | 0 | 0 | 213 | c | 2.2.0 | Wendelin Bitzan | Adrian Nagel | 1904 | 1905 | OxfordMusicOnline | Nikolai Medtner | Tales | 1 | Andantino | op.8 | NaN | NaN | NaN | NaN | 2017-10-21 | 3.02 | Apple Macintosh | NaN | NaN | Tales, op.8 | 1. Andantino | NaN | Nikolai Medtner | 3.6.2 | 1.1.1 | MS3 | MS3/op08n01.mscx | False | 22-87 (Bb0-Eb6) | https://imslp.org/wiki/2_Tales%2C_Op.8_(Medtne... | https://musicbrainz.org/work/0cdc7080-06b8-33d... | https://viaf.org/viaf/2467165271623010690003 | NaN | NaN | 47-87 (B2-Eb6) | Piano | 22-77 (Bb0-F5) | Piano | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | https://imslp.org/wiki/Special:ReverseLookup/5790 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| schumann_kinderszenen | 1: 2/4 | 1: 1 | 22 | 22 | 44.0 | 44.0 | 44.0 | 88.0 | NaN | 134.33 | 241 | 141 | 0 | 0 | 44 | G | 2.3.0 | Tal Soker (2.1.1), John Heilig (2.3.0) | AN, JHei, JH | 1838 | 1839 | OxfordMusicOnline | Robert Schumann | Kinderszenen | 1 | Von fremden Ländern und Menschen | Op.15 | NaN | NaN | NaN | NaN | 2017-03-11 | 3.02 | Microsoft Windows | http://musescore.com/user/22249306/scores/4778176 | NaN | Von fremden Ländern und Menschen\n(Of Foreign ... | NaN | NaN | Robert Schumann | 3.6.2 | 1.1.1 | MS3 | MS3/n01.mscx | False | 42-79 (F#2-G5) | https://imslp.org/wiki/Kinderszenen,_Op.15_(Sc... | https://musicbrainz.org/work/04bf8808-7a43-30e... | https://viaf.org/viaf/174865068/ | https://www.wikidata.org/wiki/Q1569982 | NaN | 62-79 (D4-G5) | Piano | 42-69 (F#2-A4) | Piano | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | https://imslp.org/wiki/Special:ReverseLookup/6... | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| tchaikovsky_seasons | 1: 3/4 | 1: 3, 29: 1, 63: 3 | 103 | 103 | 309.0 | 103.0 | 103.0 | 309.0 | NaN | 1058.17 | 1537 | 829 | 0 | 0 | 313 | A | 2.3.0 | Adrian Nagel (2.1.1), John Heilig (2.3.0) | Johannes Hentschel, AN | 1875 | 1876 | OxfordMusicOnline | Pyotr Ilyich Tchaikovsky | The Seasons | 1 | January: At the Fireside | Op. 37a | NaN | NaN | NaN | NaN | 11/29/18 | 3.02 | Linux | http://musescore.com/user/12839876/scores/3444321 | NaN | 1. January: At the Fireside | from: <i>The Seasons</i>, op. 37a | «И мирной неги уголок\nНочь сумраком одела,\nВ... | Pyotr Ilyich Tchaikovsky | 3.6.2 | 1.1.1 | MS3 | MS3/op37a01.mscx | False | 33-88 (A1-E6) | https://imslp.org/wiki/The_Seasons,_Op.37a_(Tc... | https://musicbrainz.org/work/6460a645-9844-304... | https://viaf.org/viaf/183857288 | https://www.wikidata.org/wiki/Q2914902 | mxl | 53-88 (E#3-E6) | Piano | 33-88 (A1-E6) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | https://imslp.org/wiki/Special:ReverseLookup/1... | Tom Schreyer | NaN | NaN | NaN | NaN | NaN | NaN |
print("VALUE COUNTS OF THE COLUMN 'annotators'")
all_metadata.annotators.value_counts()
VALUE COUNTS OF THE COLUMN 'annotators'
Adrian Nagel (2.1.1), John Heilig (2.3.0) 75
Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0), Davor Krkljus (2.3.0) 49
Wendelin Bitzan 20
Adrian Nagel (2.3.0) 16
Adrian Nagel 14
Tal Soker (2.1.1), John Heilig (2.3.0) 13
Lydia Carlisi (2.2.0), Adrian Nagel (2.3.0) 11
Adrian Nagel (2.1.1), Hanné Becker (2.3.0) 9
Adrian Nagel (2.1.1), Amelia Brey (2.3.0) 9
Adrian Nagel (2.2.0), Amelia Brey (2.3.0) 6
Daniel Grote (2.2.0), Adrian Nagel (2.3.0) 6
Adrian Nagel (2.2.0), Hanné Becker (2.3.0) 5
Lydia Carlisi (2.2.0), Victor Zheng (2.3.0) 4
Wendelin Bitzan (1.0.0), Adrian Nagel (2.3.0) 4
Lydia Carlisi (2.2.0), John Heilig (2.3.0) 3
Daniel Grote (2.1.1), Hanné Becker (2.3.0) 3
Lydia Carlisi (2.2.0), Amelia Brey (2.3.0) 2
Daniel Grote (2.2.0), Hanné Becker (2.3.0) 2
Adrian Nagel (2.2.0), John Heilig (2.3.0) 2
Adrian Nagel (2.1.0), John Heilig (2.3.0) 2
Adrian Nagel (2.2.0), Victor Zheng (2.3.0) 2
Adrian Nagel (2.1.1), John Heilig (2.30) 1
Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) 1
Wendelin Bitzan (2.1.1), AJW (2.3.0), Davor Krkljus (2.3.0) 1
Lars & Ya-Chuan 1
Daniel Grote (2.2.0), John Heilig (2.3.0) 1
Gabriele Ortiz Würth (2.2.0), Adrian Nagel (2.3.0) 1
Adrian Nagel, Victor Zheng (2.3.0) 1
Name: annotators, dtype: int64
print(f"Composition dates range from {all_metadata.composed_start.min()} {all_metadata.composed_start.idxmin()} "
f"to {all_metadata.composed_end.max()} {all_metadata.composed_end.idxmax()}.")
Composition dates range from 1793 ('beethoven_piano_sonatas', '01-1') to 1925 ('medtner_tales', 'op48n01').
annotated = dc.IsAnnotatedFilter().process_data(dataset)
print(f"Before: {dataset.n_indices} IDs, after filtering: {annotated.n_indices}")
Before: 287 IDs, after filtering: 264
Choose here if you want to see stats for all or only for annotated scores.
#selected = dataset
selected = annotated
Measures¶
all_measures = selected.get_facet('measures')
print(f"{len(all_measures.index)} measures over {len(all_measures.groupby(level=[0,1]))} files.")
all_measures.head()
29956 measures over 264 files.
| mc | mn | quarterbeats | duration_qb | keysig | timesig | act_dur | mc_offset | numbering_offset | dont_count | barline | breaks | repeats | next | markers | jump_bwd | jump_fwd | play_until | quarterbeats_all_endings | volta | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | ||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 1.0) | 1 | 0 | 0 | 1.0 | -4 | 2/2 | 1/4 | 3/4 | <NA> | 1 | NaN | <NA> | firstMeasure | (2,) | NaN | NaN | NaN | NaN | NaN | <NA> |
| [1.0, 5.0) | 2 | 1 | 1 | 4.0 | -4 | 2/2 | 1 | 0 | <NA> | <NA> | NaN | <NA> | NaN | (3,) | NaN | NaN | NaN | NaN | NaN | <NA> | ||
| [5.0, 9.0) | 3 | 2 | 5 | 4.0 | -4 | 2/2 | 1 | 0 | <NA> | <NA> | NaN | <NA> | NaN | (4,) | NaN | NaN | NaN | NaN | NaN | <NA> | ||
| [9.0, 13.0) | 4 | 3 | 9 | 4.0 | -4 | 2/2 | 1 | 0 | <NA> | <NA> | NaN | <NA> | NaN | (5,) | NaN | NaN | NaN | NaN | NaN | <NA> | ||
| [13.0, 17.0) | 5 | 4 | 13 | 4.0 | -4 | 2/2 | 1 | 0 | <NA> | <NA> | NaN | <NA> | NaN | (6,) | NaN | NaN | NaN | NaN | NaN | <NA> |
print("Distribution of time signatures per XML measure (MC):")
all_measures.timesig.value_counts(dropna=False)
Distribution of time signatures per XML measure (MC):
3/4 10130
2/4 6073
4/4 5186
6/8 3432
2/2 2523
3/8 1278
12/8 351
9/8 304
6/4 283
9/16 148
4/8 81
2/8 75
12/16 43
12/32 17
6/16 17
17/4 3
33/32 2
15/4 1
14/4 1
10/4 1
10/8 1
12/4 1
7/4 1
5/4 1
3/2 1
11/4 1
18/4 1
Name: timesig, dtype: int64
Notes¶
all_notes = selected.get_facet('notes')
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()
434219 notes over 264 files.
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | duration | gracenote | nominal_duration | scalar | tied | tpc | midi | name | octave | chord_id | volta | tremolo | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | |||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 1.0) | 1 | 0 | 0 | 1.0 | 0 | 3/4 | 2/2 | 1 | 1 | 1/4 | NaN | 1/4 | 1 | <NA> | 0 | 60 | C4 | 4 | 0 | <NA> | NaN |
| [1.0, 2.0) | 2 | 1 | 1 | 1.0 | 0 | 0 | 2/2 | 1 | 1 | 1/4 | NaN | 1/4 | 1 | <NA> | -1 | 65 | F4 | 4 | 1 | <NA> | NaN | ||
| [2.0, 3.0) | 2 | 1 | 2 | 1.0 | 1/4 | 1/4 | 2/2 | 1 | 1 | 1/4 | NaN | 1/4 | 1 | <NA> | -4 | 68 | Ab4 | 4 | 2 | <NA> | NaN | ||
| [3.0, 4.0) | 2 | 1 | 3 | 1.0 | 1/2 | 1/2 | 2/2 | 1 | 1 | 1/4 | NaN | 1/4 | 1 | <NA> | 0 | 72 | C5 | 5 | 3 | <NA> | NaN | ||
| [4.0, 5.0) | 2 | 1 | 4 | 1.0 | 3/4 | 3/4 | 2/2 | 1 | 1 | 1/4 | NaN | 1/4 | 1 | <NA> | -1 | 77 | F5 | 5 | 4 | <NA> | NaN |
Notes and staves¶
print("Distribution of notes over staves:")
all_notes.staff.value_counts()
Distribution of notes over staves:
1 230221
2 200617
3 2397
4 984
Name: staff, dtype: Int64
print("Distribution of notes over staves for all pieces with more than two staves\n")
for group, df in all_notes.groupby(level=[0,1]):
if (df.staff > 2).any():
print(group)
print(df.staff.value_counts().to_dict())
Distribution of notes over staves for all pieces with more than two staves
('grieg_lyric_pieces', 'op43n06')
{2: 769, 3: 422, 1: 180}
('liszt_pelerinage', '161.04_Sonetto_47_del_Petrarca')
{1: 1076, 2: 628, 3: 42, 4: 29}
('liszt_pelerinage', '161.07_Apres_une_lecture_du_Dante')
{1: 6638, 2: 5181, 3: 50}
('liszt_pelerinage', '162.01_Gondoliera')
{3: 1745, 4: 955}
('medtner_tales', 'op34n03')
{1: 1219, 2: 816, 3: 89}
('medtner_tales', 'op35n04')
{1: 1678, 2: 1632, 3: 49}
all_notes[all_notes.staff > 2].groupby(level=[0,1]).staff.value_counts()
corpus fname staff
grieg_lyric_pieces op43n06 3 422
liszt_pelerinage 161.04_Sonetto_47_del_Petrarca 3 42
4 29
161.07_Apres_une_lecture_du_Dante 3 50
162.01_Gondoliera 3 1745
4 955
medtner_tales op34n03 3 89
op35n04 3 49
Name: staff, dtype: int64
Harmony labels¶
All symbols, independent of the local key (the mode of which changes their semantics).
all_annotations = annotated.get_facet('expanded')
all_annotations.head()
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | ||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 9.0) | 1 | 0 | 0 | 9.0 | 0 | 3/4 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | <NA> | NaN |
| [9.0, 17.0) | 4 | 3 | 9 | 8.0 | 0 | 0 | 2/2 | 2 | 1 | V65 | NaN | f | i | NaN | V65 | NaN | V | NaN | 65 | NaN | NaN | NaN | NaN | Mm7 | True | True | (5, 2, -1, 1) | () | 1 | 5 | <NA> | NaN | ||
| [17.0, 21.0) | 6 | 5 | 17 | 4.0 | 0 | 0 | 2/2 | 2 | 1 | i | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | NaN | m | True | True | (0, -3, 1) | () | 0 | 0 | <NA> | NaN | ||
| [21.0, 25.0) | 7 | 6 | 21 | 4.0 | 0 | 0 | 2/2 | 2 | 1 | #viio6 | NaN | f | i | NaN | #viio6 | NaN | #vii | o | 6 | NaN | NaN | NaN | NaN | o | True | True | (2, -1, 5) | () | 5 | 2 | <NA> | NaN | ||
| [25.0, 27.0) | 8 | 7 | 25 | 2.0 | 0 | 0 | 2/2 | 2 | 1 | i6 | NaN | f | i | NaN | i6 | NaN | i | NaN | 6 | NaN | NaN | NaN | NaN | m | True | True | (-3, 1, 0) | () | 0 | -3 | <NA> | NaN |
no_chord = all_annotations.root.isna()
print(f"Concatenated annotation tables contains {all_annotations.shape[0]} rows. {no_chord.sum()} of them are not chords. Their values are:")
all_annotations.label[no_chord].value_counts(dropna=False).to_dict()
Concatenated annotation tables contains 57436 rows. 681 of them are not chords. Their values are:
{'{': 640, '}': 32, '|PAC}': 6, '|HC': 2, '|PAC': 1}
all_chords = all_annotations[~no_chord].copy()
print(f"Corpus contains {all_chords.shape[0]} tokens and {len(all_chords.chord.unique())} types over {len(all_chords.groupby(level=[0,1]))} documents.")
Corpus contains 56755 tokens and 3125 types over 264 documents.
#from ms3 import write_tsv
#write_tsv(all_annotations[all_annotations.pedalend.notna()], './issues/pedalpoints.tsv', pre_process=False)
Corpus summary¶
summary = all_metadata
if selected == annotated:
summary = summary[summary.label_count > 0].copy()
summary.length_qb = all_measures.groupby(level=[0,1]).act_dur.sum() * 4.0
summary = pd.concat([summary,
all_notes.groupby(level=[0,1]).size().rename('notes'),
], axis=1)
summary.groupby(level=0).describe().dropna(axis=1, how='all')
| last_mc | last_mn | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | all_notes_qb | n_onsets | ... | form_label_count | label_count | composed_start | composed_end | poet | arranger | mscVersion | translator | notes | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | ... | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | count | count | mean | std | min | 25% | 50% | 75% | max | count | count | mean | std | min | 25% | 50% | 75% | max | |
| corpus | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 64.0 | 184.703125 | 99.997409 | 28.0 | 111.25 | 169.0 | 253.50 | 543.0 | 64.0 | 182.218750 | 99.937674 | 28.0 | 108.75 | 166.5 | 253.25 | 543.0 | 64.0 | 238.546875 | 136.995299 | 28.0 | 129.25 | 210.5 | 309.25 | 672.0 | 64.0 | 236.078125 | 136.544813 | 28.0 | 124.25 | 207.5 | 308.25 | 664.0 | 64.0 | 719.642500 | 440.937916 | 84.0 | 365.500 | 603.50 | 958.125 | 1872.00 | 64.0 | 1559.357031 | 966.510342 | 232.12 | 790.720 | 1365.040 | 2032.4375 | 4265.05 | 64.0 | 2532.312500 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 64.0 | 343.156250 | 171.842048 | 82.0 | 234.50 | 311.5 | 398.75 | 869.0 | 64.0 | 1801.593750 | 8.466027 | 1793.0 | 1795.0 | 1798.5 | 1803.25 | 1821.0 | 64.0 | 1802.343750 | 8.300351 | 1795.0 | 1797.0 | 1798.5 | 1804.25 | 1822.0 | 0.0 | 0.0 | 64.0 | 3.02 | 0.000000e+00 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 64.0 | 2592.937500 | 1507.781181 | 517.0 | 1486.00 | 2113.5 | 3663.00 | 6817.0 |
| chopin_mazurkas | 55.0 | 89.509091 | 47.668171 | 20.0 | 58.50 | 75.0 | 112.00 | 225.0 | 55.0 | 92.527273 | 55.124054 | 20.0 | 56.50 | 72.0 | 115.50 | 247.0 | 53.0 | 102.320755 | 48.882030 | 12.0 | 68.00 | 98.0 | 125.00 | 225.0 | 53.0 | 101.377358 | 48.774294 | 12.0 | 68.00 | 96.0 | 120.00 | 224.0 | 53.0 | 304.533019 | 146.421218 | 35.0 | 204.000 | 289.00 | 360.000 | 673.00 | 55.0 | 923.672545 | 473.585484 | 208.00 | 583.625 | 782.000 | 1206.5000 | 2207.00 | 55.0 | 1017.563636 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 55.0 | 165.945455 | 97.501836 | 38.0 | 104.00 | 139.0 | 213.50 | 481.0 | 55.0 | 1836.472727 | 6.160972 | 1825.0 | 1831.0 | 1837.0 | 1842.00 | 1848.0 | 55.0 | 1836.909091 | 5.876106 | 1826.0 | 1832.0 | 1837.0 | 1842.00 | 1849.0 | 0.0 | 0.0 | 55.0 | 3.02 | 1.344547e-15 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 55.0 | 1040.018182 | 525.964634 | 252.0 | 680.00 | 921.0 | 1353.50 | 2265.0 |
| debussy_suite_bergamasque | 4.0 | 105.250000 | 36.270971 | 72.0 | 84.75 | 96.5 | 117.00 | 156.0 | 4.0 | 105.250000 | 36.270971 | 72.0 | 84.75 | 96.5 | 117.00 | 156.0 | 4.0 | 105.250000 | 36.270971 | 72.0 | 84.75 | 96.5 | 117.00 | 156.0 | 4.0 | 105.250000 | 36.270971 | 72.0 | 84.75 | 96.5 | 117.00 | 156.0 | 4.0 | 404.000000 | 147.837749 | 312.0 | 321.000 | 340.00 | 423.000 | 624.00 | 4.0 | 1522.250000 | 231.693064 | 1266.00 | 1414.500 | 1498.835 | 1606.5850 | 1825.33 | 4.0 | 1943.000000 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 253.250000 | 70.035110 | 150.0 | 243.00 | 279.0 | 289.25 | 305.0 | 4.0 | 1890.000000 | 0.000000 | 1890.0 | 1890.0 | 1890.0 | 1890.00 | 1890.0 | 4.0 | 1905.000000 | 0.000000 | 1905.0 | 1905.0 | 1905.0 | 1905.00 | 1905.0 | 0.0 | 0.0 | 4.0 | 3.02 | 0.000000e+00 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 4.0 | 2052.500000 | 327.479770 | 1680.0 | 1839.75 | 2061.0 | 2273.75 | 2408.0 |
| dvorak_silhouettes | 12.0 | 56.500000 | 18.128330 | 15.0 | 51.50 | 58.5 | 63.75 | 81.0 | 12.0 | 56.166667 | 17.928308 | 15.0 | 50.00 | 58.5 | 63.75 | 80.0 | 12.0 | 56.500000 | 18.128330 | 15.0 | 51.50 | 58.5 | 63.75 | 81.0 | 12.0 | 56.166667 | 17.928308 | 15.0 | 50.00 | 58.5 | 63.75 | 80.0 | 12.0 | 154.375000 | 51.041037 | 63.5 | 120.000 | 150.25 | 177.750 | 241.00 | 12.0 | 582.626667 | 210.029409 | 278.75 | 418.000 | 560.375 | 689.8750 | 1037.00 | 12.0 | 842.083333 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12.0 | 128.250000 | 52.829270 | 67.0 | 94.75 | 108.5 | 146.00 | 238.0 | 12.0 | 1875.000000 | 0.000000 | 1875.0 | 1875.0 | 1875.0 | 1875.00 | 1875.0 | 12.0 | 1879.000000 | 0.000000 | 1879.0 | 1879.0 | 1879.0 | 1879.00 | 1879.0 | 0.0 | 0.0 | 12.0 | 3.02 | 0.000000e+00 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 12.0 | 887.416667 | 294.495704 | 382.0 | 684.25 | 877.0 | 1074.25 | 1440.0 |
| grieg_lyric_pieces | 66.0 | 82.500000 | 47.123161 | 23.0 | 47.25 | 72.0 | 96.75 | 204.0 | 66.0 | 82.030303 | 47.118499 | 23.0 | 47.25 | 72.0 | 95.00 | 204.0 | 66.0 | 91.439394 | 52.872307 | 23.0 | 53.25 | 76.5 | 112.50 | 252.0 | 66.0 | 90.954545 | 52.824071 | 23.0 | 53.25 | 76.5 | 112.50 | 252.0 | 66.0 | 276.261364 | 160.596176 | 46.0 | 174.000 | 223.50 | 336.250 | 748.00 | 66.0 | 844.019697 | 509.227407 | 135.50 | 477.250 | 707.750 | 1136.7500 | 2389.75 | 66.0 | 945.924242 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 66.0 | 124.712121 | 57.222579 | 21.0 | 87.00 | 111.0 | 156.00 | 313.0 | 66.0 | 1887.393939 | 10.829985 | 1864.0 | 1886.0 | 1889.0 | 1896.00 | 1901.0 | 66.0 | 1889.060606 | 10.033603 | 1867.0 | 1886.0 | 1891.0 | 1896.00 | 1901.0 | 0.0 | 0.0 | 66.0 | 3.02 | 4.474922e-16 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 66.0 | 996.575758 | 626.270329 | 273.0 | 548.75 | 780.5 | 1228.25 | 3691.0 |
| liszt_pelerinage | 19.0 | 138.947368 | 113.069533 | 49.0 | 72.50 | 97.0 | 148.00 | 481.0 | 19.0 | 138.157895 | 112.514820 | 48.0 | 72.50 | 97.0 | 146.50 | 479.0 | 19.0 | 138.947368 | 113.069533 | 49.0 | 72.50 | 97.0 | 148.00 | 481.0 | 19.0 | 138.157895 | 112.514820 | 48.0 | 72.50 | 97.0 | 146.50 | 479.0 | 19.0 | 511.013158 | 363.761729 | 120.0 | 277.000 | 388.00 | 647.500 | 1505.25 | 19.0 | 1763.276842 | 1399.737520 | 289.50 | 867.085 | 1169.750 | 2369.4600 | 5885.89 | 19.0 | 3081.736842 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.0 | 266.736842 | 196.676226 | 84.0 | 135.00 | 200.0 | 290.50 | 716.0 | 19.0 | 1846.052632 | 7.419746 | 1838.0 | 1838.0 | 1848.0 | 1848.00 | 1859.0 | 19.0 | 1857.052632 | 2.247806 | 1855.0 | 1855.0 | 1858.0 | 1858.00 | 1861.0 | 0.0 | 0.0 | 19.0 | 3.02 | 4.562583e-16 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 19.0 | 3133.368421 | 2888.257764 | 749.0 | 1486.50 | 2061.0 | 2861.00 | 11869.0 |
| medtner_tales | 19.0 | 130.210526 | 115.216983 | 48.0 | 71.50 | 81.0 | 142.50 | 554.0 | 19.0 | 129.684211 | 115.060879 | 47.0 | 71.00 | 81.0 | 141.50 | 553.0 | 19.0 | 136.368421 | 125.640232 | 48.0 | 71.50 | 81.0 | 163.50 | 599.0 | 19.0 | 135.894737 | 125.446268 | 47.0 | 71.00 | 81.0 | 163.00 | 598.0 | 19.0 | 359.368421 | 235.116924 | 130.0 | 189.750 | 244.50 | 506.500 | 971.50 | 19.0 | 1186.942632 | 869.657119 | 381.63 | 607.460 | 754.770 | 1768.0400 | 3519.00 | 19.0 | 2165.473684 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.0 | 354.210526 | 214.154560 | 116.0 | 204.00 | 307.0 | 415.00 | 1020.0 | 19.0 | 1915.263158 | 5.942709 | 1904.0 | 1910.0 | 1916.0 | 1918.50 | 1925.0 | 19.0 | 1916.736842 | 5.857968 | 1905.0 | 1912.0 | 1917.0 | 1920.50 | 1925.0 | 0.0 | 0.0 | 19.0 | 3.02 | 4.562583e-16 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 19.0 | 2259.421053 | 1394.984003 | 826.0 | 1402.00 | 1917.0 | 2860.00 | 6944.0 |
| schumann_kinderszenen | 13.0 | 31.230769 | 12.008010 | 17.0 | 24.00 | 27.0 | 34.00 | 58.0 | 13.0 | 30.153846 | 11.985033 | 16.0 | 24.00 | 25.0 | 32.00 | 57.0 | 13.0 | 40.230769 | 12.975322 | 17.0 | 32.00 | 42.0 | 48.00 | 64.0 | 13.0 | 39.230769 | 13.000986 | 17.0 | 32.00 | 40.0 | 48.00 | 64.0 | 13.0 | 93.730769 | 32.815688 | 34.0 | 64.000 | 96.00 | 120.000 | 144.00 | 13.0 | 278.820000 | 119.650650 | 106.00 | 173.000 | 305.500 | 365.5000 | 471.00 | 13.0 | 385.538462 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13.0 | 72.923077 | 30.148139 | 44.0 | 49.00 | 67.0 | 84.00 | 140.0 | 13.0 | 1838.000000 | 0.000000 | 1838.0 | 1838.0 | 1838.0 | 1838.00 | 1838.0 | 13.0 | 1839.000000 | 0.000000 | 1839.0 | 1839.0 | 1839.0 | 1839.00 | 1839.0 | 0.0 | 0.0 | 13.0 | 3.02 | 4.622227e-16 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 13.0 | 401.769231 | 124.614843 | 244.0 | 292.00 | 402.0 | 466.00 | 632.0 |
| tchaikovsky_seasons | 12.0 | 104.333333 | 50.102320 | 46.0 | 76.25 | 89.0 | 119.50 | 199.0 | 12.0 | 104.166667 | 49.963320 | 46.0 | 76.25 | 89.0 | 119.50 | 198.0 | 12.0 | 112.583333 | 66.422146 | 46.0 | 76.25 | 89.0 | 122.50 | 263.0 | 12.0 | 112.416667 | 66.339976 | 46.0 | 76.25 | 89.0 | 122.50 | 263.0 | 12.0 | 350.375000 | 183.100336 | 92.0 | 249.875 | 320.50 | 367.500 | 789.00 | 12.0 | 1136.986667 | 466.493146 | 321.75 | 876.625 | 1013.045 | 1332.2075 | 1994.00 | 12.0 | 1514.083333 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 12.0 | 254.916667 | 106.769470 | 119.0 | 189.50 | 235.5 | 286.75 | 514.0 | 12.0 | 1875.000000 | 0.000000 | 1875.0 | 1875.0 | 1875.0 | 1875.00 | 1875.0 | 12.0 | 1876.000000 | 0.000000 | 1876.0 | 1876.0 | 1876.0 | 1876.00 | 1876.0 | 0.0 | 0.0 | 12.0 | 3.02 | 0.000000e+00 | 3.02 | 3.02 | 3.02 | 3.02 | 3.02 | 0.0 | 12.0 | 1562.583333 | 573.170998 | 626.0 | 1113.25 | 1585.5 | 1910.00 | 2523.0 |
9 rows × 123 columns
mean_composition_years = summary.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
corpus_colors = dict(zip(chronological_order, corpus_color_scale))
bar_data = pd.concat([mean_composition_years.rename('year'),
summary.groupby(level='corpus').size().rename('pieces')],
axis=1
).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='corpus',
color_discrete_map=corpus_colors,
height=350, width=800,
)
fig.update_traces(width=5)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "corpus_sizes.png"), scale=2)
fig.update_traces(width=5)
corpus_names = dict(
beethoven_piano_sonatas='Beethoven Sonatas',
chopin_mazurkas='Chopin Mazurkas',
debussy_suite_bergamasque='Debussy Suite',
dvorak_silhouettes="Dvořák Silhouettes",
grieg_lyric_pieces="Grieg Lyric Pieces",
liszt_pelerinage="Liszt Années",
medtner_tales="Medtner Tales",
schumann_kinderszenen="Schumann Kinderszenen",
tchaikovsky_seasons="Tchaikovsky Seasons"
)
chronological_corpus_names = [corpus_names[corp] for corp in chronological_order]
corpus_name_colors = {corpus_names[corp]: color for corp, color in corpus_colors.items()}
all_annotations['corpus_name'] = all_annotations.index.get_level_values(0).map(corpus_names)
all_chords['corpus_name'] = all_chords.index.get_level_values(0).map(corpus_names)
bar_data = summary.reset_index().groupby(['composed_end', 'corpus']).size().rename('counts').reset_index()
px.bar(bar_data, x='composed_end', y='counts', color='corpus', color_discrete_map=corpus_colors)
hist_data = summary.reset_index()
hist_data.corpus = hist_data.corpus.map(corpus_names)
hist_data.head()
| corpus | fname | TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | n_onsets | n_onset_positions | guitar_chord_count | form_label_count | label_count | annotated_key | harmony_version | annotators | reviewers | composed_start | composed_end | composed_source | composer | workTitle | movementNumber | movementTitle | workNumber | poet | lyricist | arranger | copyright | creationDate | mscVersion | platform | source | translator | title_text | subtitle_text | lyricist_text | composer_text | musescore | ms3_version | subdirectory | rel_path | has_drumset | ambitus | imslp | musicbrainz | viaf | wikidata | originalFormat | staff_1_ambitus | staff_1_instrument | staff_2_ambitus | staff_2_instrument | score_integrity | imslp.1 | key | mode | typesetter | text | score integrity | comments | staff_3_ambitus | staff_3_instrument | staff_4_ambitus | staff_4_instrument | notes | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Beethoven Sonatas | 01-1 | 1: 2/2 | 1: -4 | 154 | 152 | 608.0 | 308.0 | 304.0 | 1216.0 | NaN | 1476.00 | 1679 | 985 | 0 | 0 | 241 | f | 2.3.0 | Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) | AN | 1793 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 1 | 1 | Allegro | op.2/1 | NaN | NaN | NaN | NaN | 2019-03-05 | 3.02 | Apple Macintosh | NaN | NaN | Sonata no. 1 | 1. Allegro | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/01-1.mscx | False | 32-89 (Ab1-F6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/a78520e0-0211-3b5... | https://viaf.org/viaf/179625665 | https://www.wikidata.org/wiki/Q145813 | xml | 51-89 (Eb3-F6) | piano | 32-73 (Ab1-Db5) | piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1693 |
| 1 | Beethoven Sonatas | 01-2 | 1: 3/4 | 1: -1 | 62 | 61 | 183.0 | 124.0 | 122.0 | 366.0 | NaN | 526.17 | 1286 | 822 | 0 | 0 | 200 | F | 2.3.0 | Lars & Ya-Chuan | Adrian Nagel, Victor Zheng | 1793 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 1 | 2 | Adagio | op.2/1 | NaN | NaN | NaN | NaN | 2019-03-05 | 3.02 | Apple Macintosh | NaN | NaN | Sonata no. 1 | 2. Adagio | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/01-2.mscx | False | 31-89 (G1-F6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/bea1b893-2732-33a... | https://viaf.org/viaf/179625665 | https://www.wikidata.org/wiki/Q145813 | xml | 43-89 (G2-F6) | piano | 31-77 (G1-F5) | piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1302 |
| 2 | Beethoven Sonatas | 01-3 | 1: 3/4 | 1: -4, 43: -1 | 77 | 73 | 219.0 | 196.0 | 186.0 | 558.0 | NaN | 565.50 | 636 | 310 | 0 | 0 | 132 | f | 2.3.0 | Daniel Grote (2.2.0), Adrian Nagel (2.3.0) | Adrian Nagel, Victor Zheng | 1793 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 1 | 3 | Menuetto and Trio: Allegretto | op.2/1 | NaN | NaN | NaN | NaN | 2017-11-26 | 3.02 | Microsoft Windows | http://musescore.com/score/55335 | NaN | Sonata no. 1 | 3. Menuetto and Trio: Allegretto | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/01-3.mscx | False | 31-85 (G1-Db6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/2bd7e1ea-c696-3be... | https://viaf.org/viaf/179625665 | https://www.wikidata.org/wiki/Q145813 | mxl | 53-85 (F3-Db6) | Piano | 31-74 (G1-D5) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 639 |
| 3 | Beethoven Sonatas | 01-4 | 1: 2/2 | 1: -4 | 199 | 196 | 784.0 | 392.0 | 390.0 | 1560.0 | (((57, 58), (59, 60, 61)),) | 2326.83 | 3151 | 1789 | 0 | 0 | 355 | f | 2.3.0 | Daniel Grote (2.2.0), Adrian Nagel (2.3.0) | Adrian Nagel, Victor Zheng | 1793 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 1 | 4 | Prestissimo | op.2/1 | NaN | NaN | NaN | NaN | 2017-11-26 | 3.02 | Microsoft Windows | NaN | NaN | Sonata no. 1 | 4. Prestissimo | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/01-4.mscx | False | 31-89 (G1-F6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/b755e900-804a-312... | https://viaf.org/viaf/179625665 | https://www.wikidata.org/wiki/Q145813 | mxl | 50-89 (D3-F6) | Piano | 31-75 (G1-Eb5) | Piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3130 |
| 4 | Beethoven Sonatas | 02-1 | 1: 2/4 | 1: 3, 127: 0, 230: 3 | 342 | 336 | 672.0 | 672.0 | 664.0 | 1336.0 | (((115, 116, 117, 118), (119, 120, 121, 122, 1... | 1695.75 | 3034 | 1705 | 1 | 0 | 479 | A | 2.3.0 | Lydia Carlisi (2.2.0), Victor Zheng (2.3.0) | AN, VZ | 1794 | 1795 | OxfordMusicOnline | Ludwig van Beethoven | Sonata no. 2 | 1 | Allegro vivace | op.2/2 | NaN | NaN | NaN | NaN | 2019-03-05 | 3.02 | Apple Macintosh | NaN | NaN | Sonata no. 2 | 1. Allegro vivace | NaN | Ludwig van Beethoven | 3.6.2 | 1.1.1 | MS3 | MS3/02-1.mscx | False | 31-89 (G1-F6) | https://imslp.org/wiki/Klaviersonaten_(Beethov... | https://musicbrainz.org/work/c001a2eb-9493-327... | https://viaf.org/viaf/179221580 | https://www.wikidata.org/wiki/Q145699 | xml | 39-89 (D#2-F6) | piano | 31-76 (G1-E5) | piano | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3093 |
fig = px.histogram(hist_data, x='composed_end', color='corpus',
labels=dict(composed_end='decade',
count='pieces',
),
color_discrete_map=corpus_name_colors,
width=1000, height=400,
)
fig.update_traces(xbins=dict(
size=10
))
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "corpus_size_histogram.png"), scale=2)
fig.show()
summary.columns
Index(['TimeSig', 'KeySig', 'last_mc', 'last_mn', 'length_qb',
'last_mc_unfolded', 'last_mn_unfolded', 'length_qb_unfolded',
'volta_mcs', 'all_notes_qb', 'n_onsets', 'n_onset_positions',
'guitar_chord_count', 'form_label_count', 'label_count',
'annotated_key', 'harmony_version', 'annotators', 'reviewers',
'composed_start', 'composed_end', 'composed_source', 'composer',
'workTitle', 'movementNumber', 'movementTitle', 'workNumber', 'poet',
'lyricist', 'arranger', 'copyright', 'creationDate', 'mscVersion',
'platform', 'source', 'translator', 'title_text', 'subtitle_text',
'lyricist_text', 'composer_text', 'musescore', 'ms3_version',
'subdirectory', 'rel_path', 'has_drumset', 'ambitus', 'imslp',
'musicbrainz', 'viaf', 'wikidata', 'originalFormat', 'staff_1_ambitus',
'staff_1_instrument', 'staff_2_ambitus', 'staff_2_instrument',
'score_integrity', 'imslp.1', 'key', 'mode', 'typesetter', 'text',
'pdf', 'score integrity', 'comments', 'staff_3_ambitus',
'staff_3_instrument', 'PDF', 'staff_4_ambitus', 'staff_4_instrument',
'notes'],
dtype='object')
corpus_metadata = summary.groupby(level=0)
n_pieces = corpus_metadata.size().rename('pieces')
absolute_numbers = dict(
measures = corpus_metadata.last_mn.sum(),
length = corpus_metadata.length_qb.sum(),
notes = corpus_metadata.notes.sum(),
labels = corpus_metadata.label_count.sum(),
)
absolute = pd.DataFrame.from_dict(absolute_numbers)
relative = absolute.div(n_pieces, axis=0)
complete_summary = pd.concat([pd.concat([n_pieces, absolute], axis=1), relative, absolute.iloc[:,2:].div(absolute.measures, axis=0)], axis=1, keys=['absolute', 'per piece', 'per measure'])
complete_summary = complete_summary.apply(pd.to_numeric).round(2)
complete_summary.index = complete_summary.index.map(corpus_names)
complete_summary.to_csv('romantic_summary.tsv', sep='\t')
complete_summary
| absolute | per piece | per measure | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| pieces | measures | length | notes | labels | measures | length | notes | labels | notes | labels | |
| corpus | |||||||||||
| Beethoven Sonatas | 64 | 11662 | 35663.38 | 165948 | 21962 | 182.22 | 557.24 | 2592.94 | 343.16 | 14.23 | 1.88 |
| Chopin Mazurkas | 55 | 5089 | 14605.25 | 57201 | 9127 | 92.53 | 265.55 | 1040.02 | 165.95 | 11.24 | 1.79 |
| Debussy Suite | 4 | 421 | 1616.00 | 8210 | 1013 | 105.25 | 404.00 | 2052.50 | 253.25 | 19.50 | 2.41 |
| Dvořák Silhouettes | 12 | 674 | 1852.50 | 10649 | 1539 | 56.17 | 154.38 | 887.42 | 128.25 | 15.80 | 2.28 |
| Grieg Lyric Pieces | 66 | 5414 | 16485.25 | 65774 | 8231 | 82.03 | 249.78 | 996.58 | 124.71 | 12.15 | 1.52 |
| Liszt Années | 19 | 2625 | 9709.25 | 59534 | 5068 | 138.16 | 511.01 | 3133.37 | 266.74 | 22.68 | 1.93 |
| Medtner Tales | 19 | 2464 | 6598.00 | 42929 | 6730 | 129.68 | 347.26 | 2259.42 | 354.21 | 17.42 | 2.73 |
| Schumann Kinderszenen | 13 | 392 | 934.00 | 5223 | 948 | 30.15 | 71.85 | 401.77 | 72.92 | 13.32 | 2.42 |
| Tchaikovsky Seasons | 12 | 1250 | 3919.50 | 18751 | 3059 | 104.17 | 326.62 | 1562.58 | 254.92 | 15.00 | 2.45 |
sum_row = pd.DataFrame(complete_summary.sum(), columns=['sum']).T
sum_row.iloc[:,5:] = ''
summary_with_sum = pd.concat([complete_summary, sum_row])
summary_with_sum.loc[:, [('absolute', 'notes'), ('absolute', 'labels')]] = summary_with_sum[[('absolute', 'notes'), ('absolute', 'labels')]].astype(int)
summary_with_sum
/tmp/ipykernel_145367/1013929657.py:4: DeprecationWarning:
In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
| absolute | per piece | per measure | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| pieces | measures | length | notes | labels | measures | length | notes | labels | notes | labels | |
| Beethoven Sonatas | 64.0 | 11662.0 | 35663.38 | 165948 | 21962 | 182.22 | 557.24 | 2592.94 | 343.16 | 14.23 | 1.88 |
| Chopin Mazurkas | 55.0 | 5089.0 | 14605.25 | 57201 | 9127 | 92.53 | 265.55 | 1040.02 | 165.95 | 11.24 | 1.79 |
| Debussy Suite | 4.0 | 421.0 | 1616.00 | 8210 | 1013 | 105.25 | 404.0 | 2052.5 | 253.25 | 19.5 | 2.41 |
| Dvořák Silhouettes | 12.0 | 674.0 | 1852.50 | 10649 | 1539 | 56.17 | 154.38 | 887.42 | 128.25 | 15.8 | 2.28 |
| Grieg Lyric Pieces | 66.0 | 5414.0 | 16485.25 | 65774 | 8231 | 82.03 | 249.78 | 996.58 | 124.71 | 12.15 | 1.52 |
| Liszt Années | 19.0 | 2625.0 | 9709.25 | 59534 | 5068 | 138.16 | 511.01 | 3133.37 | 266.74 | 22.68 | 1.93 |
| Medtner Tales | 19.0 | 2464.0 | 6598.00 | 42929 | 6730 | 129.68 | 347.26 | 2259.42 | 354.21 | 17.42 | 2.73 |
| Schumann Kinderszenen | 13.0 | 392.0 | 934.00 | 5223 | 948 | 30.15 | 71.85 | 401.77 | 72.92 | 13.32 | 2.42 |
| Tchaikovsky Seasons | 12.0 | 1250.0 | 3919.50 | 18751 | 3059 | 104.17 | 326.62 | 1562.58 | 254.92 | 15.0 | 2.45 |
| sum | 264.0 | 29991.0 | 91383.13 | 434219 | 57677 | ||||||
summary[summary.ambitus.isna()]
| TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | n_onsets | n_onset_positions | guitar_chord_count | form_label_count | label_count | annotated_key | harmony_version | annotators | reviewers | composed_start | composed_end | composed_source | composer | workTitle | movementNumber | movementTitle | workNumber | poet | lyricist | arranger | copyright | creationDate | mscVersion | platform | source | translator | title_text | subtitle_text | lyricist_text | composer_text | musescore | ms3_version | subdirectory | rel_path | has_drumset | ambitus | imslp | musicbrainz | viaf | wikidata | originalFormat | staff_1_ambitus | staff_1_instrument | staff_2_ambitus | staff_2_instrument | score_integrity | imslp.1 | key | mode | typesetter | text | score integrity | comments | staff_3_ambitus | staff_3_instrument | staff_4_ambitus | staff_4_instrument | notes | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname |
summary.ambitus.str.extract(r"^(\d+)-(\d+)")
| 0 | 1 | ||
|---|---|---|---|
| corpus | fname | ||
| beethoven_piano_sonatas | 01-1 | 32 | 89 |
| 01-2 | 31 | 89 | |
| 01-3 | 31 | 85 | |
| 01-4 | 31 | 89 | |
| 02-1 | 31 | 89 | |
| ... | ... | ... | ... |
| tchaikovsky_seasons | op37a08 | 35 | 91 |
| op37a09 | 36 | 91 | |
| op37a10 | 37 | 82 | |
| op37a11 | 38 | 92 | |
| op37a12 | 39 | 92 |
264 rows × 2 columns
ambitus = summary.ambitus.str.extract(r"^(\d+)-(\d+)").astype(int)
ambitus.columns = ['low', 'high']
ambitus['range'] = ambitus.high - ambitus.low
ambitus.head()
| low | high | range | ||
|---|---|---|---|---|
| corpus | fname | |||
| beethoven_piano_sonatas | 01-1 | 32 | 89 | 57 |
| 01-2 | 31 | 89 | 58 | |
| 01-3 | 31 | 85 | 54 | |
| 01-4 | 31 | 89 | 58 | |
| 02-1 | 31 | 89 | 58 |
ambitus.groupby(level=0).high.max()
corpus
beethoven_piano_sonatas 101
chopin_mazurkas 98
debussy_suite_bergamasque 97
dvorak_silhouettes 97
grieg_lyric_pieces 102
liszt_pelerinage 102
medtner_tales 100
schumann_kinderszenen 91
tchaikovsky_seasons 93
Name: high, dtype: int64
ambitus.groupby(level=0).low.min()
corpus
beethoven_piano_sonatas 24
chopin_mazurkas 25
debussy_suite_bergamasque 24
dvorak_silhouettes 23
grieg_lyric_pieces 21
liszt_pelerinage 21
medtner_tales 21
schumann_kinderszenen 25
tchaikovsky_seasons 29
Name: low, dtype: int64
ambitus.groupby(level=0).range.max()
corpus
beethoven_piano_sonatas 72
chopin_mazurkas 68
debussy_suite_bergamasque 70
dvorak_silhouettes 69
grieg_lyric_pieces 74
liszt_pelerinage 81
medtner_tales 78
schumann_kinderszenen 60
tchaikovsky_seasons 64
Name: range, dtype: int64
ambitus.groupby(level=0).high.max() - ambitus.groupby(level=0).low.min()
corpus
beethoven_piano_sonatas 77
chopin_mazurkas 73
debussy_suite_bergamasque 73
dvorak_silhouettes 74
grieg_lyric_pieces 81
liszt_pelerinage 81
medtner_tales 79
schumann_kinderszenen 66
tchaikovsky_seasons 64
dtype: int64
Phrases¶
phrase_segmented = dc.PhraseSlicer().process_data(selected)
phrases = phrase_segmented.get_slice_info()
print(f"Overall number of phrases is {len(phrases.index)}")
phrases.head(20)
Overall number of phrases is 3596
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | ||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 32.0) | 1 | 0 | 0 | 32.0 | 0 | 3/4 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN |
| [32.0, 64.0) | 9 | 8 | 32 | 32.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | v{ | NaN | f | i | NaN | v | NaN | v | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (1, -2, 2) | () | 1 | 1 | NaN | NaN | ||
| [64.0, 72.0) | 17 | 16 | 64 | 8.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | I6{ | NaN | f | III | NaN | I6 | NaN | I | NaN | 6 | NaN | NaN | NaN | { | M | True | False | (4, 1, 0) | () | 0 | 4 | NaN | NaN | ||
| [72.0, 77.0) | 19 | 18 | 72 | 5.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | I6{ | NaN | f | III | NaN | I6 | NaN | I | NaN | 6 | NaN | NaN | NaN | { | M | True | False | (4, 1, 0) | () | 0 | 4 | NaN | NaN | ||
| [77.0, 163.0) | 21 | 20 | 77 | 86.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | III | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | False | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [163.0, 192.0) | 42 | 41 | 163 | 29.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | viio7/V{ | NaN | f | III | NaN | viio7/V | NaN | vii | o | 7 | NaN | V | NaN | { | o7 | True | False | (6, 3, 0, -3) | () | 6 | 6 | NaN | NaN | ||
| [192.0, 217.0) | 50 | 48 | 192 | 25.0 | 0 | 3/4 | 2/2 | 2 | 1 | { | NaN | f | III | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | True | False | () | () | <NA> | <NA> | NaN | NaN | ||
| [217.0, 249.0) | 57 | 55 | 217 | 32.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | iv | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [249.0, 321.0) | 65 | 63 | 249 | 72.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | v | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [321.0, 377.0) | 83 | 81 | 321 | 56.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | i | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [377.0, 401.0) | 97 | 95 | 377 | 24.0 | 0 | 0 | 2/2 | 2 | 1 | bII6(4)]{ | NaN | f | i | V | bII6(4) | NaN | bII | NaN | 6 | 4 | NaN | NaN | { | M | True | True | (1, -4, -5) | () | -5 | 1 | NaN | NaN | ||
| [401.0, 432.0) | 103 | 101 | 401 | 31.0 | 0 | 0 | 2/2 | 2 | 1 | i|IAC}{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | IAC | }{ | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | ||
| [432.0, 473.0) | 110 | 108 | 432 | 41.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | ||
| [473.0, 559.0) | 121 | 119 | 473 | 86.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC{ | NaN | f | i | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | { | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [559.0, 608.0) | 142 | 140 | 559 | 49.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | Ger6{ | NaN | f | i | NaN | Ger6 | Ger | vii | o | 65 | b3 | V | NaN | { | Ger | True | True | (-4, 0, -3, 6) | () | 6 | -4 | NaN | NaN | ||
| 01-2 | [0.0, 11.25) | 1 | 0 | 0 | 11.25 | 0 | 1/2 | 3/4 | 2 | 1 | F.V{ | NaN | F | I | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | { | M | False | False | (1, 5, 2) | () | 1 | 1 | NaN | NaN | |
| [11.25, 24.0) | 5 | 4 | 45/4 | 12.75 | 5/16 | 5/16 | 3/4 | 2 | 1 | { | NaN | F | I | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | False | False | () | () | <NA> | <NA> | NaN | NaN | ||
| [24.0, 35.25) | 9 | 8 | 24 | 11.25 | 1/2 | 1/2 | 3/4 | 2 | 1 | { | NaN | F | I | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | False | False | () | () | <NA> | <NA> | NaN | NaN | ||
| [35.25, 48.0) | 13 | 12 | 141/4 | 12.75 | 5/16 | 5/16 | 3/4 | 2 | 1 | { | NaN | F | I | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | { | NaN | False | False | () | () | <NA> | <NA> | NaN | NaN | ||
| [48.0, 60.0) | 17 | 16 | 48 | 12.0 | 1/2 | 1/2 | 3/4 | 2 | 1 | vi.V{ | NaN | F | vi | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | { | M | False | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN |
phrase_segments = phrase_segmented.get_facet('expanded')
phrase_segments.to_csv('romantic_phrase_segments.tsv.zip', sep='\t')
phrase_segments
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | |||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 32.0) | [0.0, 0.0) | 1 | 0 | 0 | 0.0 | 0 | 3/4 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | <NA> | NaN | <NA> | <NA> | <NA> | NaN | NaN | NaN | { | <NA> | True | True | <NA> | <NA> | <NA> | <NA> | NaN | NaN |
| [0.0, 9.0) | 1 | 0 | 0 | 9.0 | 0 | 3/4 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | <NA> | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | |||
| [9.0, 17.0) | 4 | 3 | 9 | 8.0 | 0 | 0 | 2/2 | 2 | 1 | V65 | NaN | f | i | NaN | V65 | NaN | V | NaN | 65 | NaN | NaN | NaN | NaN | Mm7 | True | True | (5, 2, -1, 1) | () | 1 | 5 | NaN | NaN | |||
| [17.0, 21.0) | 6 | 5 | 17 | 4.0 | 0 | 0 | 2/2 | 2 | 1 | i | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | NaN | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | |||
| [21.0, 25.0) | 7 | 6 | 21 | 4.0 | 0 | 0 | 2/2 | 2 | 1 | #viio6 | NaN | f | i | NaN | #viio6 | NaN | #vii | o | 6 | NaN | NaN | NaN | NaN | o | True | True | (2, -1, 5) | () | 5 | 2 | NaN | NaN | |||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| tchaikovsky_seasons | op37a12 | [493.0, 528.0) | [495.0, 499.0) | 166 | 166 | 495 | 4.0 | 0 | 0 | 3/4 | 2 | 1 | V(64) | NaN | Ab | I | NaN | V(64) | NaN | V | NaN | NaN | 64 | NaN | NaN | NaN | M | False | False | (1, 0, 4) | () | 1 | 1 | NaN | NaN |
| [499.0, 501.0) | 167 | 167 | 499 | 2.0 | 1/4 | 1/4 | 3/4 | 2 | 1 | Ger7 | NaN | Ab | I | NaN | Ger7 | Ger | vii | o | 7 | b3 | V | NaN | NaN | Ger | False | False | (6, -4, 0, -3) | () | 6 | 6 | NaN | NaN | |||
| [501.0, 504.0) | 168 | 168 | 501 | 3.0 | 0 | 0 | 3/4 | 2 | 1 | V(64) | NaN | Ab | I | NaN | V(64) | NaN | V | NaN | NaN | 64 | NaN | NaN | NaN | M | False | False | (1, 0, 4) | () | 1 | 1 | NaN | NaN | |||
| [504.0, 504.0) | 169 | 169 | 504 | 0.0 | 0 | 0 | 3/4 | 2 | 1 | I|IAC} | NaN | Ab | I | NaN | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | IAC | } | <NA> | False | False | <NA> | <NA> | <NA> | <NA> | NaN | NaN | |||
| [504.0, 528.0) | 169 | 169 | 504 | 24.0 | 0 | 0 | 3/4 | 2 | 1 | I|IAC} | NaN | Ab | I | NaN | I | NaN | I | NaN | NaN | NaN | NaN | <NA> | <NA> | M | False | False | (0, 4, 1) | () | 0 | 0 | NaN | NaN |
65082 rows × 32 columns
phrases[phrases.duration_qb > 50]
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice | ||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [77.0, 163.0) | 21 | 20 | 77 | 86.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | III | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | False | (1, 5, 2) | () | 1 | 1 | NaN | NaN |
| [249.0, 321.0) | 65 | 63 | 249 | 72.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | v | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [321.0, 377.0) | 83 | 81 | 321 | 56.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC}{ | NaN | f | i | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | }{ | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [473.0, 559.0) | 121 | 119 | 473 | 86.0 | 0 | 0 | 2/2 | 2 | 1 | V[V|HC{ | NaN | f | i | V | V | NaN | V | NaN | NaN | NaN | NaN | HC | { | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| 02-1 | [182.5, 242.0) | 93 | 92 | 365/2 | 59.5 | 0 | 0 | 2/4 | 2 | 1 | I|PAC}{ | NaN | A | V | NaN | I | NaN | I | NaN | NaN | NaN | NaN | PAC | }{ | M | False | False | (0, 4, 1) | () | 0 | 0 | <NA> | NaN | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| medtner_tales | op48n02 | [268.0, 319.0) | 91 | 90 | 268 | 51.0 | 0 | 0 | 3/4 | 2 | 1 | i{ | NaN | g | v | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN |
| [493.0, 559.0) | 166 | 165 | 493 | 66.0 | 0 | 0 | 3/4 | 2 | 1 | i[i}{ | NaN | g | i | i | i | NaN | i | NaN | NaN | NaN | NaN | NaN | }{ | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | ||
| tchaikovsky_seasons | op37a02 | [284.0, 338.0) | 143 | 143 | 284 | 54.0 | 0 | 0 | 2/4 | 2 | 1 | bVII.V65{ | NaN | D | bVII | NaN | V65 | NaN | V | NaN | 65 | NaN | NaN | NaN | { | Mm7 | False | False | (5, 2, -1, 1) | () | 1 | 5 | NaN | NaN |
| op37a05 | [222.0, 274.5) | 77 | 77 | 222 | 52.5 | 0 | 0 | 9/8 | 2 | 1 | bIII64{ | NaN | G | I | NaN | bIII64 | NaN | bIII | NaN | 64 | NaN | NaN | NaN | { | M | False | False | (-2, -3, 1) | () | -3 | -2 | NaN | NaN | |
| op37a07 | [171.0, 224.0) | 43 | 43 | 171 | 53.0 | 3/4 | 3/4 | 4/4 | 2 | 1 | V7(9){ | NaN | Eb | I | NaN | V7(9) | NaN | V | NaN | 7 | 9 | NaN | NaN | { | Mm7 | False | False | (1, 5, 2, -1) | (3,) | 1 | 1 | NaN | NaN |
291 rows × 32 columns
phrase2timesigs = phrase_segments.groupby(level=[0,1,2]).timesig.unique()
n_timesignatures_per_phrase = phrase2timesigs.map(len)
uniform_timesigs = phrase2timesigs[n_timesignatures_per_phrase == 1].map(lambda l: l[0])
more_than_one = n_timesignatures_per_phrase > 1
print(f"Filtered out the {more_than_one.sum()} phrases incorporating more than one time signature.")
n_timesigs = n_timesignatures_per_phrase.value_counts()
display(n_timesigs.reset_index().rename(columns=dict(index='#time signatures', timesig='#phrases')))
uniform_timesig_phrases = phrases.loc[uniform_timesigs.index]
timesig_in_quarterbeats = uniform_timesigs.map(Fraction) * 4
exact_measure_lengths = uniform_timesig_phrases.duration_qb / timesig_in_quarterbeats
uniform_timesigs = pd.concat([exact_measure_lengths.rename('duration_measures'), uniform_timesig_phrases], axis=1)
fig = px.histogram(uniform_timesigs, x='duration_measures', log_y=True,
labels=dict(duration_measures='phrase length bin in number of measures'),
color_discrete_sequence=corpus_color_scale,
height=400,
width = 1000,
)
fig.update_traces(xbins=dict( # bins used for histogram
#start=0.0,
#end=100.0,
size=1
))
fig.update_layout(**STD_LAYOUT)
fig.update_xaxes(dtick=4, gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "phrase_lengths.png"), scale=2)
fig.show()
Filtered out the 53 phrases incorporating more than one time signature.
| #time signatures | #phrases | |
|---|---|---|
| 0 | 1 | 3543 |
| 1 | 2 | 52 |
| 2 | 3 | 1 |
uniform_timesigs[uniform_timesigs.duration_measures > 80]
| duration_measures | mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | phrase_slice |
Keys¶
from ms3 import roman_numeral2fifths, transform, resolve_all_relative_numerals, replace_boolean_mode_by_strings
keys_segmented = dc.LocalKeySlicer().process_data(selected)
keys = keys_segmented.get_slice_info()
keys.to_csv('romantic_keys.tsv.zip', sep='\t')
print(f"Overall number of key segments is {len(keys.index)}")
keys["localkey_fifths"] = transform(keys, roman_numeral2fifths, ['localkey', 'globalkey_is_minor'])
keys.head(20)
Overall number of key segments is 1477
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | localkey_fifths | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | localkey_slice | |||||||||||||||||||||||||||||||||
| beethoven_piano_sonatas | 01-1 | [0.0, 41.0) | 1 | 0 | 0 | 41.0 | 0 | 3/4 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | 0 |
| [41.0, 209.0) | 12 | 11 | 41 | 168.0 | 0 | 0 | 2/2 | 2 | 1 | III.IVM2 | ii7(2) | f | III | NaN | IVM2 | NaN | IV | M | 2 | NaN | NaN | NaN | NaN | MM7 | True | False | (4, -1, 3, 0) | () | -1 | 4 | NaN | NaN | -3 | ||
| [209.0, 247.0) | 55 | 53 | 209 | 38.0 | 0 | 0 | 2/2 | 2 | 1 | iv.viio65/V | NaN | f | iv | NaN | viio65/V | NaN | vii | o | 65 | NaN | V | NaN | NaN | o7 | True | True | (3, 0, -3, 6) | () | 6 | 3 | NaN | NaN | -1 | ||
| [247.0, 276.0) | 64 | 62 | 247 | 29.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | v.It6 | NaN | f | v | NaN | It6 | It | vii | o | 6 | b3 | V | NaN | NaN | It | True | True | (-4, 0, 6) | () | 6 | -4 | NaN | NaN | 1 | ||
| [276.0, 284.0) | 71 | 69 | 276 | 8.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | iv.iio64 | NaN | f | iv | NaN | iio64 | NaN | ii | o | 64 | NaN | NaN | NaN | NaN | o | True | True | (-4, 2, -1) | () | 2 | -4 | NaN | NaN | -1 | ||
| [284.0, 307.0) | 73 | 71 | 284 | 23.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | III.iio64 | NaN | f | III | NaN | iio64 | NaN | ii | o | 64 | NaN | NaN | NaN | NaN | o | True | False | (-4, 2, -1) | () | 2 | -4 | NaN | NaN | -3 | ||
| [307.0, 608.0) | 79 | 77 | 307 | 301.0 | 1/2 | 1/2 | 2/2 | 2 | 1 | i.V | NaN | f | i | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | NaN | M | True | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | 0 | ||
| 01-2 | [0.0, 48.0) | 1 | 0 | 0 | 48.0 | 0 | 1/2 | 3/4 | 2 | 1 | F.V{ | NaN | F | I | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | { | M | False | False | (1, 5, 2) | () | 1 | 1 | NaN | NaN | 0 | |
| [48.0, 61.0) | 17 | 16 | 48 | 13.0 | 1/2 | 1/2 | 3/4 | 2 | 1 | vi.V{ | NaN | F | vi | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | { | M | False | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN | 3 | ||
| [61.0, 91.5) | 22 | 21 | 61 | 30.5 | 0 | 0 | 3/4 | 2 | 1 | V.V7 | NaN | F | V | NaN | V7 | NaN | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | NaN | 1 | ||
| [91.5, 183.0) | 32 | 31 | 183/2 | 91.5 | 1/8 | 1/8 | 3/4 | 2 | 1 | I.V7{ | NaN | F | I | NaN | V7 | NaN | V | NaN | 7 | NaN | NaN | NaN | { | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | NaN | 0 | ||
| 01-3 | [0.0, 12.0) | 1 | 0 | 0 | 12.0 | 0 | 1/2 | 3/4 | 2 | 1 | f.i[i{ | NaN | f | i | i | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | NaN | NaN | 0 | |
| [12.0, 49.0) | 5 | 4 | 12 | 37.0 | 1/2 | 1/2 | 3/4 | 2 | 1 | III.I[I{ | NaN | f | III | I | I | NaN | I | NaN | NaN | NaN | NaN | NaN | { | M | True | False | (0, 4, 1) | () | 0 | 0 | NaN | NaN | -3 | ||
| [49.0, 76.0) | 19 | 17 | 49 | 27.0 | 0 | 0 | 3/4 | 2 | 1 | iv.#viio7 | NaN | f | iv | NaN | #viio7 | NaN | #vii | o | 7 | NaN | NaN | NaN | NaN | o7 | True | True | (5, 2, -1, -4) | () | 5 | 5 | NaN | NaN | -1 | ||
| [76.0, 120.0) | 28 | 26 | 76 | 44.0 | 0 | 0 | 3/4 | 2 | 1 | i.#viio | NaN | f | i | NaN | #viio | NaN | #vii | o | NaN | NaN | NaN | NaN | NaN | o | True | True | (5, 2, -1) | () | 5 | 5 | NaN | NaN | 0 | ||
| [120.0, 138.0) | 43 | 40 | 120 | 18.0 | 0 | 1/2 | 3/4 | 2 | 1 | I.I{ | NaN | f | I | NaN | I | NaN | I | NaN | NaN | NaN | NaN | NaN | { | M | True | False | (0, 4, 1) | () | 0 | 0 | NaN | NaN | 0 | ||
| [138.0, 157.0) | 49 | 46 | 138 | 19.0 | 1/2 | 1/2 | 3/4 | 2 | 1 | V.V65 | NaN | f | V | NaN | V65 | NaN | V | NaN | 65 | NaN | NaN | NaN | NaN | Mm7 | True | False | (5, 2, -1, 1) | () | 1 | 5 | NaN | NaN | 1 | ||
| [157.0, 219.0) | 57 | 53 | 157 | 62.0 | 0 | 0 | 3/4 | 2 | 1 | I.V7 | NaN | f | I | V | V7 | NaN | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | True | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | NaN | 0 | ||
| 01-4 | [0.0, 47.0) | 1 | 0 | 0 | 47.0 | 0 | 0 | 2/2 | 2 | 1 | f.i{ | NaN | f | i | NaN | i | NaN | i | NaN | NaN | NaN | NaN | NaN | { | m | True | True | (0, -3, 1) | () | 0 | 0 | <NA> | NaN | 0 | |
| [47.0, 224.0) | 13 | 12 | 47 | 177.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | v.V65/V | NaN | f | v | NaN | V65/V | NaN | V | NaN | 65 | NaN | V | NaN | NaN | Mm7 | True | True | (6, 3, 0, 2) | () | 2 | 6 | <NA> | NaN | 1 |
keys.duration_qb.sum()
91369.25
phrases.duration_qb.sum()
91328.25
key_durations = keys.groupby(['globalkey_is_minor', 'localkey']).duration_qb.sum().sort_values(ascending=False)
print(f"{len(key_durations)} keys overall including hierarchical such as 'III/v'.")
87 keys overall including hierarchical such as 'III/v'.
keys_resolved = resolve_all_relative_numerals(keys)
key_resolved_durations = keys_resolved.groupby(['globalkey_is_minor', 'localkey']).duration_qb.sum().sort_values(ascending=False)
print(f"{len(key_resolved_durations)} keys overall after resolving hierarchical ones.")
key_resolved_durations
62 keys overall after resolving hierarchical ones.
globalkey_is_minor localkey
False I 36743.208333
True i 24815.625
False V 3491.5
True I 3421.666667
III 3333.0
...
False bI 22.0
bvii 12.0
#I 11.0
bv 8.0
bii 4.0
Name: duration_qb, Length: 62, dtype: object
pie_data = replace_boolean_mode_by_strings(key_resolved_durations.reset_index())
px.pie(pie_data, names='localkey', values='duration_qb', facet_col='globalkey_mode', height=700)
localkey_fifths_durations = keys.groupby(['localkey_fifths', 'localkey_is_minor']).duration_qb.sum()
# sort by stacked bar length:
localkey_fifths_durations = localkey_fifths_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()), ascending=False)
bar_data = replace_boolean_mode_by_strings(localkey_fifths_durations.reset_index())
bar_data.localkey_fifths = bar_data.localkey_fifths.map(ms3.fifths2iv)
fig = px.bar(bar_data, x='localkey_fifths', y='duration_qb', color='localkey_mode', log_y=True, barmode='group',
labels=dict(localkey_fifths='Roots of local keys as intervallic distance from the global tonic',
duration_qb='total duration in quarter notes',
localkey_mode='mode'
),
color_discrete_sequence=corpus_color_scale,
width=1000)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "key_segments.png"), scale=2)
fig.show()
localkey_fifths_durations = keys.groupby(['localkey_fifths', 'localkey_is_minor']).duration_qb.sum()
# sort by stacked bar length:
bar_data = replace_boolean_mode_by_strings(localkey_fifths_durations.reset_index())
bar_data.localkey_fifths = bar_data.localkey_fifths.map(ms3.fifths2iv)
fig = px.bar(bar_data, x='localkey_fifths', y='duration_qb', color='localkey_mode', log_y=True, barmode='group',
labels=dict(localkey_fifths='Roots of local keys as intervallic distance from the global tonic',
duration_qb='total duration in quarter notes',
localkey_mode='mode'
),
color_discrete_sequence=corpus_color_scale,
width=1000)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "key_segments_line_of_fifths.png"), scale=2)
fig.show()
localkey_fifths_durations_stacked = localkey_fifths_durations.groupby(level=0).sum().sort_values()
pd.concat([localkey_fifths_durations_stacked, localkey_fifths_durations_stacked.rename('fraction') / localkey_fifths_durations_stacked.sum()], axis=1)
| duration_qb | fraction | |
|---|---|---|
| localkey_fifths | ||
| 10 | 24.0 | 0.000263 |
| -9 | 24.0 | 0.000263 |
| 7 | 41.0 | 0.000449 |
| -11 | 64.5 | 0.000706 |
| 6 | 67.5 | 0.000739 |
| 9 | 70.5 | 0.000772 |
| -7 | 134.0 | 0.001467 |
| -8 | 157.0 | 0.001718 |
| 8 | 161.0 | 0.001762 |
| -6 | 371.0 | 0.00406 |
| 5 | 504.0 | 0.005516 |
| 2 | 566.083333 | 0.006196 |
| -5 | 587.75 | 0.006433 |
| -2 | 1186.5 | 0.012986 |
| 3 | 1453.208333 | 0.015905 |
| 4 | 2347.458333 | 0.025692 |
| -1 | 2593.0 | 0.028379 |
| -4 | 2995.625 | 0.032786 |
| -3 | 4728.25 | 0.051749 |
| 1 | 6084.625 | 0.066594 |
| 0 | 67208.25 | 0.735567 |
keys[keys.localkey_fifths == -9]
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | localkey_fifths | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | localkey_slice | |||||||||||||||||||||||||||||||||
| liszt_pelerinage | 161.01_Sposalizio | [362.0, 386.0) | 62 | 62 | 362 | 24.0 | 0 | 0 | 6/4 | 2 | 1 | bbVII.V(+964) | NaN | E | bbVII | NaN | V(+964) | NaN | V | NaN | NaN | +964 | NaN | NaN | NaN | M | False | False | (1, 0, 4) | (3,) | 1 | 1 | NaN | NaN | -9 |
keys[keys.localkey_fifths == 10]
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | localkey_fifths | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | localkey_slice | |||||||||||||||||||||||||||||||||
| liszt_pelerinage | 161.04_Sonetto_47_del_Petrarca | [140.0, 164.0) | 28 | 28 | 140 | 24.0 | 0 | 0 | 6/4 | 2 | 1 | #VI.viio43{ | NaN | Db | #VI | NaN | viio43 | NaN | vii | o | 43 | NaN | NaN | NaN | { | o7 | False | False | (-1, -4, 5, 2) | () | 5 | -1 | NaN | NaN | 10 |
Cadences¶
all_annotations.cadence.value_counts()
PAC 1289
HC 895
IAC 631
PC 61
EC 47
DC 30
Name: cadence, dtype: int64
all_annotations.groupby("corpus_name").cadence.value_counts()
corpus_name cadence
Beethoven Sonatas PAC 603
HC 406
IAC 275
EC 31
DC 15
PC 3
Chopin Mazurkas PAC 197
HC 64
IAC 56
PC 8
EC 3
DC 1
Debussy Suite HC 8
IAC 8
PAC 7
DC 3
PC 2
EC 1
Dvořák Silhouettes IAC 49
PAC 46
HC 32
DC 6
EC 3
PC 3
Grieg Lyric Pieces PAC 190
HC 147
IAC 65
PC 23
DC 4
Liszt Années PAC 89
HC 88
IAC 74
PC 13
EC 8
Medtner Tales PAC 76
HC 52
IAC 29
PC 3
Schumann Kinderszenen HC 32
PAC 30
IAC 14
EC 1
Tchaikovsky Seasons HC 66
IAC 61
PAC 51
PC 6
DC 1
Name: cadence, dtype: int64
cadence_count_per_corpus = all_annotations.groupby("corpus_name").cadence.value_counts().sort_values(ascending=False)
cadence_count_per_corpus.groupby(level=0).sum()
corpus_name
Beethoven Sonatas 1333
Chopin Mazurkas 329
Debussy Suite 29
Dvořák Silhouettes 139
Grieg Lyric Pieces 429
Liszt Années 272
Medtner Tales 160
Schumann Kinderszenen 77
Tchaikovsky Seasons 185
Name: cadence, dtype: int64
cadence_fraction_per_corpus = cadence_count_per_corpus / cadence_count_per_corpus.groupby(level=0).sum()
fig = px.bar(cadence_fraction_per_corpus.rename('count').reset_index(), x='corpus_name', y='count', color='cadence',
labels=dict(count='fraction', corpus=''),
height=400, width=900,
category_orders=dict(corpus_name=chronological_corpus_names))
#color_discrete_map=cadence_colors,
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "cadences.png"), scale=2)
fig.show()
Harmony labels¶
Unigrams¶
For computing unigram statistics, the tokens need to be grouped by their occurrence within a major or a minor key because this changes their meaning. To that aim, the annotated corpus needs to be sliced into contiguous localkey segments which are then grouped into a major (is_minor=False) and a minor group.
root_durations = all_chords[all_chords.root.between(-5,6)].groupby(['root', 'chord_type']).duration_qb.sum()
# sort by stacked bar length:
#root_durations = root_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()), ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
px.bar(bar_data, x='root', y='duration_qb', color='chord_type')
relative_roots = all_chords[['numeral', 'duration_qb', 'relativeroot', 'localkey_is_minor', 'chord_type']].copy()
relative_roots['relativeroot_resolved'] = transform(relative_roots, ms3.resolve_relative_keys, ['relativeroot', 'localkey_is_minor'])
has_rel = relative_roots.relativeroot_resolved.notna()
relative_roots.loc[has_rel, 'localkey_is_minor'] = relative_roots.loc[has_rel, 'relativeroot_resolved'].str.islower()
relative_roots['root'] = transform(relative_roots, roman_numeral2fifths, ['numeral', 'localkey_is_minor'])
chord_type_frequency = all_chords.chord_type.value_counts()
replace_rare = ms3.map_dict({t: 'other' for t in chord_type_frequency[chord_type_frequency < 500].index})
relative_roots['type_reduced'] = relative_roots.chord_type.map(replace_rare)
#is_special = relative_roots.chord_type.isin(('It', 'Ger', 'Fr'))
#relative_roots.loc[is_special, 'root'] = -4
root_durations = relative_roots.groupby(['root', 'type_reduced']).duration_qb.sum().sort_values(ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
root_order = bar_data.groupby('root').duration_qb.sum().sort_values(ascending=False).index.to_list()
type_colors = dict(zip(('Mm7', 'M', 'o7', 'o', 'mm7', 'm', '%7', 'MM7', 'other'), colorlover.scales['9']['qual']['Paired']))
fig = px.bar(bar_data, x='root', y='duration_qb', color='type_reduced', barmode='group', log_y=True,
color_discrete_map=type_colors,
category_orders=dict(root=root_order,
type_reduced=relative_roots.type_reduced.value_counts().index.to_list(),
),
labels=dict(root="intervallic difference between chord root to the local or secondary tonic",
duration_qb="duration in quarter notes",
type_reduced="chord type",
),
width=1000,
height=400,
)
fig.update_layout(**STD_LAYOUT,
legend=dict(
orientation='h',
xanchor="right",
x=1,
y=1,
)
)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "chord_roots.png"), scale=2)
fig.show()
print(f"Reduced to {len(set(bar_data.iloc[:,:2].itertuples(index=False, name=None)))} types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.")
Reduced to 123 types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.
dim_or_aug = bar_data[bar_data.root.str.startswith("a") | bar_data.root.str.startswith("d")].duration_qb.sum()
complete = bar_data.duration_qb.sum()
print(f"On diminished or augmented scale degrees: {dim_or_aug} / {complete} = {dim_or_aug / complete}")
On diminished or augmented scale degrees: 371.50000000000017 / 91369.25 = 0.0040659193328171144
mode_slices = dc.ModeGrouper().process_data(keys_segmented)
Whole dataset¶
mode_slices.get_slice_info()
| mc | mn | quarterbeats | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | alt_label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | pedalend | ||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | corpus | fname | localkey_slice | ||||||||||||||||||||||||||||||||
| False | beethoven_piano_sonatas | 01-1 | [41.0, 209.0) | 12 | 11 | 41 | 168.0 | 0 | 0 | 2/2 | 2 | 1 | III.IVM2 | ii7(2) | f | III | NaN | IVM2 | NaN | IV | M | 2 | NaN | NaN | NaN | NaN | MM7 | True | False | (4, -1, 3, 0) | () | -1 | 4 | NaN | NaN |
| [284.0, 307.0) | 73 | 71 | 284 | 23.0 | 3/4 | 3/4 | 2/2 | 2 | 1 | III.iio64 | NaN | f | III | NaN | iio64 | NaN | ii | o | 64 | NaN | NaN | NaN | NaN | o | True | False | (-4, 2, -1) | () | 2 | -4 | NaN | NaN | |||
| 01-2 | [0.0, 48.0) | 1 | 0 | 0 | 48.0 | 0 | 1/2 | 3/4 | 2 | 1 | F.V{ | NaN | F | I | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | { | M | False | False | (1, 5, 2) | () | 1 | 1 | NaN | NaN | ||
| [61.0, 91.5) | 22 | 21 | 61 | 30.5 | 0 | 0 | 3/4 | 2 | 1 | V.V7 | NaN | F | V | NaN | V7 | NaN | V | NaN | 7 | NaN | NaN | NaN | NaN | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | NaN | |||
| [91.5, 183.0) | 32 | 31 | 183/2 | 91.5 | 1/8 | 1/8 | 3/4 | 2 | 1 | I.V7{ | NaN | F | I | NaN | V7 | NaN | V | NaN | 7 | NaN | NaN | NaN | { | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | NaN | NaN | |||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| True | tchaikovsky_seasons | op37a11 | [155.0, 166.0) | 39 | 39 | 155 | 11.0 | 3/4 | 3/4 | 4/4 | 2 | 1 | iv.V | NaN | E | iv | NaN | V | NaN | V | NaN | NaN | NaN | NaN | NaN | NaN | M | False | True | (1, 5, 2) | () | 1 | 1 | NaN | NaN |
| [171.0, 180.0) | 43 | 43 | 171 | 9.0 | 3/4 | 3/4 | 4/4 | 2 | 1 | v.ii%43 | NaN | E | v | NaN | ii%43 | NaN | ii | % | 43 | NaN | NaN | NaN | NaN | %7 | False | True | (-4, 0, 2, -1) | () | 2 | -4 | NaN | NaN | |||
| [224.0, 232.0) | 57 | 57 | 224 | 8.0 | 0 | 0 | 4/4 | 2 | 1 | iii.V(64) | NaN | E | iii | NaN | V(64) | NaN | V | NaN | NaN | 64 | NaN | NaN | NaN | M | False | True | (1, 0, -3) | () | 1 | 1 | NaN | NaN | |||
| op37a12 | [303.0, 309.0) | 102 | 102 | 303 | 6.0 | 0 | 0 | 3/4 | 2 | 1 | iii/bVI.V(#2) | NaN | Ab | iii/bVI | NaN | V(#2) | NaN | V | NaN | NaN | #2 | NaN | NaN | NaN | M | False | True | (1, 3, 2) | () | 1 | 1 | NaN | NaN | ||
| [399.0, 442.0) | 134 | 134 | 399 | 43.0 | 0 | 0 | 3/4 | 2 | 1 | iii/bVI.V7(#2) | NaN | Ab | iii/bVI | v | V7(#2) | NaN | V | NaN | 7 | #2 | NaN | NaN | NaN | Mm7 | False | True | (1, 3, 2, -1) | () | 1 | 1 | NaN | NaN |
1477 rows × 32 columns
unigrams = dc.ChordSymbolUnigrams().process_data(mode_slices)
unigrams.get_group_results()
| I | V7 | V | I6 | IV | V(64) | V43 | V2 | V6 | vi | ii | ii6 | V65 | IV6 | I64 | V7(9) | viio6 | V7/V | ii7 | V7/IV | V7(6) | V64 | ii65 | vi6 | iii | viio | i | vi7 | V65/V | V7(4) | I(9) | viio7/V | V/vi | V(4) | I(4) | viio7 | V2/V | iii6 | viio43 | iv | V2/IV | V7/ii | V43/V | V7/vi | IVM7 | IV64 | V(6) | iv6 | I/V | viio2 | ... | V6(#6) | V6(#6)/ii/III | V6(#96)/vi/III | V6(11) | V6(11)/V | V6(9)/iv | V6(b5)/V | V6/#iv | V7(#2)/bII | V7(#62)/iv | VI(13) | V7(94)/iv | V7(96)/VI | V7(96)/VII | V7(b2)/iv | V7(b3)/#iii | V7(b3)/ii | V7(b5)/bii | V7(b6)/VI | V7(b9)/bII | V7(b9)/ii/III | V7(b9)/iii | V7(b9b6)/III | V7/#vii | V7/III/iv | V7/V/#iii | V7/V/iii | VI(#2) | VI(#9) | VI(11) | V7(94)/bv | V7(9+4)/VII | V7(#94) | V7(#96#4)/VI | V7(#96) | V7(+4)/V | V7(+96)/III | V7(+b3)/V | V7(1394)/V | V7(13b5) | V7(2)/VII | V7(2)/iv | V7(6)/VI | V7(6+4)/VI | V7(64)/VII | V7(64^2)/v | V7(6^2)/III | V7(6b2)/V | V7(6b2)/bII | V7(9)/bv | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | 5206 | 2579 | 2164 | 1753 | 970 | 707 | 669 | 645 | 636 | 602 | 559 | 545 | 534 | 467 | 448 | 366 | 346 | 329 | 278 | 230 | 200 | 194 | 191 | 184 | 180 | 178 | 175 | 163 | 157 | 146 | 143 | 137 | 129 | 129 | 127 | 122 | 121 | 117 | 117 | 113 | 110 | 109 | 107 | 107 | 104 | 104 | 99 | 95 | 92 | 89 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| True | 235 | 1190 | 1410 | 47 | 84 | 462 | 304 | 216 | 356 | 42 | 45 | 19 | 229 | 38 | 23 | 92 | 11 | 137 | 18 | 20 | 99 | 153 | 13 | 18 | 12 | 7 | 2703 | 1 | 49 | 54 | 8 | 151 | 6 | 79 | 9 | 1 | 33 | 22 | 3 | 480 | 6 | 9 | 20 | 8 | 4 | 14 | 77 | 301 | 26 | 4 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
2 rows × 3125 columns
modes = {True: 'MINOR', False: 'MAJOR'}
for (is_minor,), ugs in unigrams.iter_group_results():
print(f"{modes[is_minor]} UNIGRAMS\n{ugs.shape[0]} types, {ugs.sum()} tokens")
print(ugs.head(20).to_string())
MAJOR UNIGRAMS
2008 types, 35438 tokens
I 5206
V7 2579
V 2164
I6 1753
IV 970
V(64) 707
V43 669
V2 645
V6 636
vi 602
ii 559
ii6 545
V65 534
IV6 467
I64 448
V7(9) 366
viio6 346
V7/V 329
ii7 278
V7/IV 230
MINOR UNIGRAMS
1717 types, 21317 tokens
i 2703
V 1410
V7 1190
i6 913
iv 480
V(64) 462
V6 356
VI 316
V43 304
iv6 301
i64 237
I 235
V65 229
V2 216
III 212
#viio43 209
bII 164
v 158
V7/III 158
#viio7 154
Per corpus¶
corpus_wise_unigrams = dc.Pipeline([dc.CorpusGrouper(), dc.ChordSymbolUnigrams()]).process_data(mode_slices)
corpus_wise_unigrams.get_group_results()
| I | V | I6 | V7 | V6 | V2 | V43 | V(64) | IV | ii6 | V65 | viio6 | IV6 | ii | vi | I64 | viio | i | V65/V | vi6 | viio7/V | iii6 | V7/V | V64 | i6 | ii65 | V7/IV | viio/V | V2/IV | V2/V | V43/V | viio43 | V/vi | V7(4) | V(4) | V6/V | iii | iv | V/V | viio6/V | V65/IV | viio64 | V65/ii | iv6 | V7/ii | #viio7/vi | V43/IV | viio7 | V(b64) | viio2 | ... | V7(6+4)/VI | V64(#7b5) | V6(9)/iv | V6/bv | V64/bIV | V64(+6)/iv | V64(4)/bII | V64(9)/VII | V6(11)/V | V6(11) | V6(#96)/vi/III | V6(#6)/ii/III | V6(#6) | V43/ii/III | V43/bIV | V43/IV/III | V43(b5)/VI | V43(6)/III | V7(6)/VI | V7(+4)/V | V7(#2)/bII | V7(#5)/bII/iv | V7(#96#4)/VI | V7(+96)/III | V7(+b3)/V | V7(13b5) | V7(2)/VII | V7(#11#96)/VI | V65/V/III | V65(b9)/VII | V65(b9)/VI | V65(9)/ii/III | V65(9)/VII | V65(13)/III | V65(+6) | #viio6/V | #viio6(2)/V | V7(13)/III | iv64(#7) | iio(9) | i64(2) | #viio65(2)/V | i64(^2) | V(9#764) | i6(6#4) | iv64(94) | i(+4) | i(97) | V2(6)/bII | i(4)/v | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | corpus | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | beethoven_piano_sonatas | 2208 | 1178 | 1122 | 1027 | 466 | 443 | 440 | 437 | 394 | 391 | 335 | 274 | 263 | 235 | 226 | 207 | 139 | 131 | 107 | 93 | 86 | 81 | 77 | 75 | 72 | 71 | 69 | 66 | 61 | 58 | 57 | 57 | 55 | 51 | 49 | 48 | 48 | 47 | 47 | 47 | 46 | 44 | 43 | 43 | 42 | 42 | 42 | 39 | 38 | 38 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| chopin_mazurkas | 954 | 251 | 67 | 656 | 25 | 36 | 18 | 111 | 183 | 66 | 48 | 6 | 48 | 74 | 75 | 49 | 16 | 15 | 24 | 24 | 10 | 3 | 133 | 27 | 3 | 30 | 63 | 9 | 1 | 30 | 6 | 11 | 22 | 10 | 23 | 4 | 46 | 10 | 4 | 6 | 5 | 5 | 3 | 16 | 42 | 11 | 3 | 39 | 4 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| debussy_suite_bergamasque | 24 | 4 | 9 | 21 | 4 | 6 | 3 | 0 | 10 | 3 | 5 | 6 | 12 | 10 | 4 | 3 | 0 | 0 | 0 | 2 | 0 | 7 | 1 | 4 | 0 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 19 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| dvorak_silhouettes | 254 | 79 | 103 | 89 | 8 | 32 | 12 | 9 | 46 | 5 | 9 | 29 | 7 | 15 | 20 | 2 | 10 | 8 | 7 | 8 | 0 | 2 | 7 | 1 | 2 | 3 | 6 | 0 | 2 | 3 | 5 | 6 | 4 | 3 | 2 | 0 | 4 | 5 | 4 | 0 | 1 | 0 | 0 | 2 | 3 | 2 | 0 | 4 | 0 | 4 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| grieg_lyric_pieces | 566 | 224 | 116 | 264 | 25 | 39 | 78 | 26 | 127 | 14 | 31 | 10 | 45 | 53 | 116 | 68 | 6 | 8 | 2 | 13 | 1 | 5 | 57 | 41 | 0 | 31 | 20 | 0 | 16 | 17 | 24 | 5 | 21 | 14 | 20 | 0 | 33 | 12 | 9 | 1 | 0 | 1 | 0 | 9 | 8 | 2 | 11 | 7 | 0 | 3 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| liszt_pelerinage | 501 | 155 | 109 | 250 | 30 | 23 | 19 | 46 | 108 | 28 | 35 | 1 | 38 | 70 | 85 | 43 | 1 | 8 | 2 | 12 | 23 | 3 | 21 | 9 | 3 | 6 | 35 | 2 | 9 | 7 | 8 | 15 | 6 | 36 | 11 | 2 | 5 | 21 | 3 | 6 | 0 | 1 | 2 | 18 | 6 | 8 | 2 | 30 | 4 | 13 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| medtner_tales | 263 | 139 | 83 | 122 | 24 | 21 | 15 | 11 | 61 | 19 | 12 | 14 | 27 | 52 | 44 | 37 | 4 | 5 | 7 | 23 | 3 | 11 | 26 | 13 | 6 | 24 | 23 | 2 | 10 | 3 | 1 | 2 | 18 | 17 | 7 | 0 | 12 | 11 | 8 | 4 | 0 | 0 | 4 | 6 | 4 | 2 | 3 | 0 | 0 | 6 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| schumann_kinderszenen | 106 | 65 | 69 | 50 | 5 | 10 | 30 | 22 | 23 | 10 | 19 | 4 | 10 | 13 | 9 | 7 | 2 | 0 | 0 | 4 | 8 | 0 | 7 | 10 | 0 | 8 | 9 | 0 | 2 | 0 | 2 | 0 | 3 | 3 | 2 | 0 | 7 | 1 | 4 | 0 | 2 | 0 | 1 | 1 | 2 | 1 | 2 | 2 | 0 | 2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| tchaikovsky_seasons | 330 | 69 | 75 | 100 | 49 | 35 | 54 | 45 | 18 | 9 | 40 | 2 | 17 | 37 | 23 | 32 | 0 | 0 | 8 | 5 | 6 | 5 | 0 | 14 | 1 | 9 | 4 | 5 | 9 | 3 | 4 | 21 | 0 | 10 | 15 | 3 | 6 | 6 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 5 | 6 | 1 | 0 | 23 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| True | beethoven_piano_sonatas | 121 | 584 | 33 | 389 | 192 | 91 | 151 | 260 | 22 | 12 | 118 | 7 | 14 | 24 | 24 | 4 | 6 | 857 | 31 | 13 | 86 | 12 | 8 | 46 | 350 | 3 | 4 | 24 | 4 | 8 | 9 | 2 | 4 | 10 | 15 | 11 | 8 | 201 | 11 | 10 | 3 | 0 | 0 | 120 | 2 | 2 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| chopin_mazurkas | 46 | 249 | 3 | 339 | 32 | 26 | 44 | 49 | 6 | 1 | 25 | 0 | 3 | 1 | 0 | 1 | 0 | 511 | 10 | 3 | 20 | 2 | 77 | 29 | 60 | 5 | 12 | 0 | 0 | 9 | 1 | 0 | 0 | 9 | 15 | 2 | 1 | 59 | 36 | 2 | 0 | 0 | 0 | 54 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| debussy_suite_bergamasque | 7 | 9 | 2 | 8 | 0 | 4 | 8 | 0 | 15 | 0 | 3 | 0 | 2 | 7 | 3 | 2 | 0 | 57 | 5 | 1 | 0 | 1 | 1 | 0 | 16 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 1 | 0 | 0 | 2 | 15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| dvorak_silhouettes | 5 | 42 | 0 | 50 | 10 | 4 | 1 | 20 | 0 | 0 | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 101 | 0 | 0 | 1 | 0 | 4 | 11 | 34 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 20 | 2 | 0 | 0 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| grieg_lyric_pieces | 26 | 211 | 1 | 110 | 16 | 22 | 34 | 6 | 22 | 0 | 2 | 0 | 8 | 5 | 1 | 6 | 0 | 380 | 0 | 0 | 6 | 2 | 22 | 32 | 82 | 0 | 2 | 0 | 0 | 7 | 0 | 0 | 1 | 2 | 23 | 0 | 0 | 58 | 16 | 0 | 0 | 0 | 7 | 38 | 3 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| liszt_pelerinage | 21 | 99 | 5 | 60 | 37 | 8 | 19 | 44 | 10 | 3 | 13 | 0 | 1 | 2 | 10 | 10 | 0 | 159 | 1 | 0 | 11 | 2 | 4 | 13 | 100 | 2 | 2 | 7 | 1 | 4 | 3 | 1 | 0 | 1 | 5 | 2 | 0 | 19 | 0 | 0 | 0 | 0 | 1 | 25 | 3 | 0 | 3 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| medtner_tales | 5 | 131 | 3 | 109 | 38 | 32 | 23 | 36 | 9 | 3 | 9 | 1 | 3 | 6 | 4 | 0 | 1 | 346 | 2 | 1 | 21 | 3 | 19 | 14 | 175 | 2 | 0 | 2 | 0 | 2 | 5 | 0 | 1 | 29 | 15 | 2 | 1 | 77 | 9 | 1 | 0 | 0 | 1 | 50 | 1 | 1 | 0 | 0 | 0 | 2 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| tchaikovsky_seasons | 4 | 72 | 0 | 110 | 21 | 29 | 24 | 36 | 0 | 0 | 53 | 0 | 6 | 0 | 0 | 0 | 0 | 247 | 0 | 0 | 6 | 0 | 1 | 8 | 87 | 0 | 0 | 4 | 0 | 2 | 0 | 0 | 0 | 2 | 6 | 0 | 0 | 22 | 0 | 2 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 4 | 2 | 2 | 2 | 2 | 1 | 1 | |
| schumann_kinderszenen | 0 | 13 | 0 | 15 | 10 | 0 | 0 | 11 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 45 | 0 | 0 | 0 | 0 | 1 | 0 | 9 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3 | 2 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
18 rows × 3125 columns
for (is_minor, corpus_name), ugs in corpus_wise_unigrams.iter_group_results():
print(f"{corpus_name} {modes[is_minor]} unigrams ({ugs.shape[0]} types, {ugs.sum()} tokens)")
print(ugs.head(5).to_string())
beethoven_piano_sonatas MAJOR unigrams (839 types, 15130 tokens)
I 2208
V 1178
I6 1122
V7 1027
V6 466
chopin_mazurkas MAJOR unigrams (460 types, 5257 tokens)
I 954
V7 656
V 251
IV 183
V7/V 133
debussy_suite_bergamasque MAJOR unigrams (94 types, 335 tokens)
I 24
V7 21
iii 19
V7(+2) 12
IV6 12
dvorak_silhouettes MAJOR unigrams (140 types, 1067 tokens)
I 254
I6 103
V7 89
V 79
IV 46
grieg_lyric_pieces MAJOR unigrams (659 types, 4882 tokens)
I 566
V7 264
V 224
IV 127
I6 116
liszt_pelerinage MAJOR unigrams (552 types, 3455 tokens)
I 501
V7 250
V 155
I6 109
IV 108
medtner_tales MAJOR unigrams (636 types, 2864 tokens)
I 263
V 139
V7 122
I6 83
IV 61
schumann_kinderszenen MAJOR unigrams (114 types, 714 tokens)
I 106
I6 69
V 65
V7 50
V43 30
tchaikovsky_seasons MAJOR unigrams (194 types, 1734 tokens)
I 330
V7 100
I6 75
V 69
V43 54
beethoven_piano_sonatas MINOR unigrams (498 types, 6379 tokens)
i 857
V 584
V7 389
i6 350
V(64) 260
chopin_mazurkas MINOR unigrams (388 types, 3700 tokens)
i 511
V7 339
V 249
III 82
V7/V 77
debussy_suite_bergamasque MINOR unigrams (228 types, 678 tokens)
i 57
III 23
V7/III 19
VI 17
i6 16
dvorak_silhouettes MINOR unigrams (69 types, 459 tokens)
i 101
V7 50
V 42
i6 34
V(64) 20
grieg_lyric_pieces MINOR unigrams (506 types, 3259 tokens)
i 380
V 211
V7 110
i6 82
iv 58
liszt_pelerinage MINOR unigrams (322 types, 1581 tokens)
i 159
i6 100
V 99
V7 60
V(64) 44
medtner_tales MINOR unigrams (827 types, 3806 tokens)
i 346
i6 175
V 131
V7 109
iv 77
schumann_kinderszenen MINOR unigrams (50 types, 197 tokens)
i 45
V7 15
V 13
V(64) 11
V6 10
tchaikovsky_seasons MINOR unigrams (125 types, 1258 tokens)
i 247
V7 110
i6 87
V 72
V65 53
types_shared_between_corpora = {}
for (is_minor, corpus_name), ugs in corpus_wise_unigrams.iter_group_results():
if (is_minor, ) in types_shared_between_corpora:
types_shared_between_corpora[(is_minor, )] = types_shared_between_corpora[(is_minor, )].intersection(ugs.index)
else:
types_shared_between_corpora[(is_minor, )] = set(ugs.index)
sorted_unigrams = unigrams.get_group_results().to_dict()
types_shared_between_corpora = {k: sorted(v, key=lambda x: sorted_unigrams[x][k], reverse=True) for k, v in types_shared_between_corpora.items()}
n_types = {k: len(v) for k, v in types_shared_between_corpora.items()}
print(f"Chords which occur in all corpora, sorted by descending global frequency:\n{types_shared_between_corpora}\nCounts: {n_types}")
Chords which occur in all corpora, sorted by descending global frequency:
{(False,): ['I', 'V7', 'V', 'I6', 'IV', 'V43', 'V2', 'V6', 'vi', 'ii', 'ii6', 'V65', 'IV6', 'I64', 'viio6', 'ii7', 'V7/IV', 'V64', 'ii65', 'vi6', 'iii', 'V7(4)', 'V/V', 'ii2', 'ii64', '#viio7/vi'], (True,): ['i', 'V', 'V7', 'i6', 'iv', 'VI', 'V65', '#viio43', 'V7/V', 'iv64']}
Counts: {(False,): 26, (True,): 10}
Per piece¶
piece_wise_unigrams = dc.Pipeline([dc.PieceGrouper(), dc.ChordSymbolUnigrams()]).process_data(mode_slices)
piece_wise_unigrams.get_group_results()
| V7 | I6 | I | V(64) | ii6 | V | ii6(2) | V7(+b9) | V65/V | viio7/V | I64 | V7(b9) | viio43 | V43 | V2 | V6 | V43/V | IVM2 | IV(0) | V7(b2) | I(974) | V65 | ii7 | iii | iii(0) | iio64 | viio6 | viio64 | IV | V7(4) | IV6 | iii6 | I(64) | V2(4) | IV(94) | V6/V | I(4) | V(9) | I(94) | I(+4) | I6(2) | ii | ii(9) | I6(6#4#2) | ii65 | I(97b64) | vii%43 | viio | IV6(+2) | vi6 | ... | I(7)/V | #vii%65(b3)/V/V | #vii%65/V/V | I(94)/V | V7(b5)/ii | Fr6/#iii | #VI+6 | ii%2(6) | ii2(6)/III | IIIM43 | Ger2(2) | i(b964) | i64(7)/#vii | viio2/#VI | V+2/bII/iii | bII6/iii | V+2/bII | V64(b5) | i(#11^9) | viio2/#IV | V7/#IV | i64(7)/iv | iv7/IV | i64(7)/vi | V64(#6) | ii%7(4)/v | i6(6)/v | bII7 | Fr2/iv | VII(+4) | #vi%7(b4) | ii%43(+6) | Ger6(4)/V | IV/vii | I(4)/VI | iv(+#7) | V(b5+4)/V | i%65 | v6(4) | IV/#vi | i/#vi | vi6/III | vii%43/V/III | V64(#7) | V/#vi | vi%7/#vi | vi%7/bv | vi%7/iii | ii%7/bV | v6/vii | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| localkey_is_minor | fname | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | 01-1 | 11 | 8 | 7 | 5 | 5 | 5 | 3 | 3 | 3 | 3 | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 01-2 | 19 | 17 | 31 | 15 | 9 | 10 | 0 | 0 | 0 | 0 | 1 | 0 | 4 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 5 | 5 | 9 | 7 | 5 | 5 | 4 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 01-3 | 8 | 8 | 16 | 3 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 4 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 01-4 | 4 | 2 | 11 | 2 | 4 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 02-1 | 20 | 32 | 76 | 3 | 6 | 51 | 0 | 0 | 2 | 0 | 1 | 0 | 1 | 7 | 13 | 24 | 4 | 0 | 0 | 0 | 0 | 9 | 1 | 0 | 0 | 0 | 4 | 6 | 1 | 0 | 11 | 8 | 0 | 0 | 0 | 2 | 2 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 0 | 8 | 0 | 3 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| True | 161.01_Sposalizio | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| BI145-1op50-1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| op42n02 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| op71n04 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 23-2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
411 rows × 3125 columns
types_shared_between_pieces = {}
for (is_minor, corpus_name), ugs in piece_wise_unigrams.iter_group_results():
if is_minor in types_shared_between_pieces:
types_shared_between_pieces[is_minor] = types_shared_between_pieces[is_minor].intersection(ugs.index)
else:
types_shared_between_pieces[is_minor] = set(ugs.index)
print(types_shared_between_pieces)
{False: set(), True: set()}
Bigrams¶
Whole dataset¶
bigrams = dc.ChordSymbolBigrams().process_data(mode_slices)
bigrams.get_group_results()
| V7 | V | I | V2 | V(64) | I | I6 | V65 | I | V | V43 | I | IV | V6 | I | V | V(64) | I6 | I | IV | V7 | V7/V | V7(6) | V43 | viio6 | V | ii6 | I | I(9) | I6 | V7(4) | V7(9) | I6 | viio6 | I64 | I | I(4) | I6 | I | V2 | V7/IV | I6 | V7 | viio | V6 | I6 | ... | V7 | V65/iv | V7(#6)/V | V7(#96#4)/VI | V7(#94) | V7(#9) | V7(#62)/iv | V7(#5)/bII/iv | V7(#2)/bII | V7(#11) | V7(#11#96)/VI | V7 | V65/v | V7 | V65/vii | V65/v | V65/iv | V7 | V65/iv | V7 | vo7 | ||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| I | I | V | V7 | I6 | V7 | I6 | I | I | IV | V7 | I | V43 | I | I | V6 | I6 | V | IV | I64 | V2 | vi | I6 | V(64) | V | V7 | I6 | I | V2 | V(64) | V65 | I | viio6 | V7 | V7 | V43 | I6 | I | ii6 | IV6 | I | V2 | viio6 | I | IV | V | V7(9) | I | V | ii6 | ... | VIM7(+2) | VIM7 | VI(11) | V7(6)/bII | ii%65(9) | iio6/V | #viio7/iv | iv7 | V7(#11#96)/VI | V7 | V | I(4) | I | V7(2)/iv | bII/iv | V2/bII | V7 | V7/VI | iv6/IV | V7(4)/VII | V7(#96) | V7(#94) | V/v | #ivo7 | #iii6 | vii(11) | VM2(9)/iv | V7(6)/v | ii%43 | V65(b9)/VI | V7/iv | V7/VI | V6/iv | V2(9)/III | IV | IIIM2/iv | #vi%7 | #vi2 | #viio7(9) | #viio7/#vi | Ger6(b8) | I(#7) | III6(4) | IV7(4) | V(#6) | V(#64) | V(#7) | V(4) | V65(11)/bII | i | |
| localkey_is_minor | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | 1244 | 581 | 441 | 402 | 344 | 309 | 294 | 292 | 261 | 244 | 232 | 218 | 210 | 164 | 163 | 159 | 147 | 141 | 137 | 135 | 132 | 125 | 122 | 118 | 117 | 116 | 115 | 113 | 112 | 111 | 111 | 110 | 109 | 107 | 98 | 96 | 94 | 94 | 93 | 92 | 85 | 85 | 81 | 81 | 79 | 78 | 78 | 77 | 76 | 76 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| True | 42 | 35 | 22 | 21 | 6 | 150 | 10 | 1 | 17 | 4 | 141 | 4 | 4 | 2 | 6 | 7 | 2 | 139 | 6 | 2 | 2 | 1 | 1 | 61 | 34 | 65 | 3 | 1 | 38 | 0 | 11 | 8 | 4 | 26 | 20 | 5 | 2 | 2 | 4 | 0 | 8 | 1 | 2 | 5 | 4 | 0 | 16 | 4 | 44 | 0 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
2 rows × 14058 columns
modes = {True: 'MINOR', False: 'MAJOR'}
for (is_minor,), ugs in bigrams.iter_group_results():
print(f"{modes[is_minor]} BIGRAMS\n{ugs.shape[0]} transition types, {ugs.sum()} tokens")
print(ugs.head(20).to_string())
MAJOR BIGRAMS
8506 transition types, 34551 tokens
from to
V7 I 1244
V I 581
I V 441
V7 402
V2 I6 344
V(64) V7 309
I I6 294
I6 I 292
V65 I 261
I IV 244
V V7 232
V43 I 218
I V43 210
IV I 164
V6 I 163
I V6 159
V I6 147
V(64) V 141
I6 IV 137
I I64 135
MINOR BIGRAMS
6561 transition types, 20727 tokens
from to
V7 i 577
V i 420
i V 210
V7 196
i6 174
V(64) V7 150
i6 i 147
V V7 141
V(64) V 139
V43 i 133
V65 i 129
V6 i 118
i V6 99
V2 i6 94
i iv 88
VI 78
V43 75
V7(6) V7 65
V7 V(64) 61
V(4) V 58
Per corpus¶
corpus_wise_bigrams = dc.Pipeline([dc.CorpusGrouper(), dc.ChordSymbolBigrams()]).process_data(mode_slices)
corpus_wise_bigrams.get_group_results()
| V7 | V | V2 | I | V(64) | I6 | I | V65 | I | V43 | V | I | V(64) | V6 | I | I6 | V | ii6 | viio6 | I | V | V43 | I6 | viio6 | I | I6 | V7 | V2 | I6 | IV | I | I6 | viio | V6 | I | ii6 | V7(4) | V7 | V65/V | I64 | ii6 | I | IV6 | V7/V | iii6 | V43 | ii6 | ... | i(+4) | iv64(4) | ii%65(4) | iv64 | ii%2 | iv | viio2 | V2/V | V2/iv | V43(+6)/iv | V6 | V65 | V2 | V | V7(6) | VI(4) | V7(6#2) | V7 | i | V7 | #viio43 | #viio2 | ii%7 | i | iv6/v | Ger6 | Fr6/iv | i6 | V7/v | V43(4)/V | i/iv | V6 | i6(#76) | i6 | V65 | V7 | i/iv | V43 | i | V/iv | ii%65 | V2 | ii%65 | V2 | V2(6)/bII | i(4)/v | V2/bII | ii%65(2) | |||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| I | I | I6 | V | V7 | I | I6 | I | V7 | V43 | I | V7 | V6 | V | I | V2 | viio6 | I6 | V(64) | I | V65 | V2 | I6 | IV | I6 | IV | ii6 | V2 | V(64) | I | V43 | I6 | viio6 | ii6 | I | V | I64 | IV6 | I6 | V7 | V | V | I | V7 | vi | iii6 | V | ii6 | V7 | V | ... | i | iv64 | ii%65 | V7(64) | i(9) | V43(+6)/iv | #vi%2 | V(#74) | iv6(#72) | V43/iv | V65(6) | i(9) | i6(#72) | V7(6#2) | V65/iv | i6(9) | VI(6) | V7 | i(+4) | V2(6) | ii%65 | i6(2) | V7(#2) | V7(2) | #viio43(2) | V/v | VI | I(4) | V65(6) | i(4)/v | V6 | v6/iv | Fr6 | i6(9) | Fr6/iv | i(#4) | i(6) | i6(#7) | iv6/v | VI(6) | V7(#9) | ii%65 | ii%7(4) | i6(#4) | V43 | i6(#76) | V2/bII | i/v | ii%65(2) | ii%65 | ||
| localkey_is_minor | corpus | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | beethoven_piano_sonatas | 525 | 318 | 238 | 228 | 212 | 212 | 186 | 175 | 167 | 167 | 160 | 138 | 124 | 118 | 115 | 107 | 103 | 100 | 95 | 92 | 91 | 91 | 88 | 81 | 81 | 81 | 75 | 68 | 68 | 68 | 67 | 67 | 67 | 66 | 61 | 59 | 57 | 52 | 50 | 47 | 44 | 43 | 43 | 41 | 41 | 40 | 39 | 39 | 39 | 36 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| chopin_mazurkas | 336 | 51 | 18 | 70 | 48 | 16 | 14 | 21 | 92 | 1 | 10 | 58 | 10 | 8 | 0 | 11 | 0 | 0 | 5 | 2 | 6 | 2 | 1 | 4 | 0 | 96 | 9 | 0 | 34 | 6 | 2 | 8 | 3 | 0 | 12 | 6 | 34 | 13 | 0 | 7 | 10 | 1 | 5 | 26 | 10 | 0 | 56 | 0 | 3 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| debussy_suite_bergamasque | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| dvorak_silhouettes | 64 | 37 | 29 | 17 | 6 | 16 | 21 | 2 | 45 | 5 | 3 | 2 | 3 | 0 | 6 | 8 | 6 | 7 | 0 | 9 | 2 | 0 | 1 | 16 | 5 | 3 | 0 | 14 | 0 | 3 | 0 | 11 | 7 | 0 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 0 | 2 | 0 | 6 | 0 | 3 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| grieg_lyric_pieces | 102 | 37 | 8 | 39 | 10 | 4 | 13 | 6 | 30 | 3 | 16 | 11 | 2 | 0 | 6 | 0 | 0 | 2 | 0 | 3 | 1 | 3 | 3 | 6 | 2 | 18 | 4 | 0 | 9 | 1 | 6 | 20 | 0 | 2 | 0 | 2 | 19 | 8 | 0 | 11 | 3 | 0 | 25 | 2 | 26 | 0 | 6 | 0 | 5 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| liszt_pelerinage | 97 | 47 | 6 | 44 | 10 | 22 | 21 | 14 | 42 | 4 | 4 | 8 | 5 | 4 | 12 | 1 | 0 | 21 | 4 | 0 | 2 | 2 | 4 | 8 | 0 | 23 | 1 | 3 | 4 | 2 | 2 | 5 | 0 | 6 | 0 | 6 | 11 | 5 | 1 | 26 | 11 | 0 | 13 | 1 | 37 | 0 | 0 | 0 | 0 | 3 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| medtner_tales | 50 | 45 | 10 | 27 | 2 | 2 | 12 | 5 | 17 | 2 | 2 | 5 | 7 | 2 | 5 | 0 | 0 | 9 | 0 | 5 | 2 | 10 | 3 | 7 | 2 | 20 | 4 | 0 | 1 | 0 | 2 | 6 | 4 | 0 | 2 | 2 | 6 | 5 | 2 | 2 | 0 | 3 | 4 | 0 | 2 | 0 | 9 | 0 | 3 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| schumann_kinderszenen | 26 | 17 | 10 | 4 | 14 | 12 | 20 | 16 | 5 | 15 | 2 | 4 | 0 | 6 | 1 | 0 | 0 | 2 | 4 | 0 | 0 | 4 | 7 | 8 | 4 | 3 | 0 | 0 | 0 | 0 | 13 | 2 | 0 | 2 | 0 | 0 | 2 | 2 | 0 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 3 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| tchaikovsky_seasons | 38 | 29 | 25 | 12 | 7 | 8 | 7 | 22 | 4 | 13 | 21 | 6 | 8 | 3 | 18 | 5 | 0 | 6 | 3 | 2 | 3 | 0 | 8 | 7 | 0 | 0 | 0 | 0 | 2 | 1 | 4 | 2 | 0 | 0 | 0 | 1 | 6 | 5 | 0 | 6 | 2 | 4 | 4 | 0 | 6 | 0 | 0 | 0 | 2 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| True | beethoven_piano_sonatas | 29 | 24 | 6 | 8 | 92 | 1 | 9 | 12 | 13 | 4 | 2 | 61 | 7 | 109 | 6 | 2 | 4 | 2 | 0 | 1 | 9 | 10 | 3 | 6 | 2 | 3 | 4 | 1 | 34 | 4 | 3 | 1 | 2 | 0 | 4 | 15 | 0 | 0 | 2 | 7 | 29 | 15 | 2 | 3 | 1 | 4 | 5 | 3 | 7 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| chopin_mazurkas | 8 | 1 | 0 | 5 | 22 | 0 | 0 | 4 | 6 | 0 | 0 | 47 | 0 | 13 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 11 | 0 | 0 | 0 | 1 | 0 | 0 | 14 | 0 | 2 | 0 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 0 | 0 | 16 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| debussy_suite_bergamasque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| liszt_pelerinage | 4 | 8 | 0 | 9 | 9 | 0 | 1 | 0 | 2 | 0 | 2 | 4 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| dvorak_silhouettes | 0 | 2 | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| grieg_lyric_pieces | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| medtner_tales | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| schumann_kinderszenen | 0 | 0 | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| tchaikovsky_seasons | 0 | 0 | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | ... | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | |
18 rows × 14058 columns
for (is_minor, corpus_name), ugs in corpus_wise_bigrams.iter_group_results():
print(f"{corpus_name} {modes[is_minor]} bigrams ({ugs.shape[0]} transition types, {ugs.sum()} tokens)")
print(ugs.head(5).to_string())
beethoven_piano_sonatas MAJOR bigrams (3481 transition types, 14884 tokens)
from to
V7 I 525
V I 318
V2 I6 238
I V 228
V(64) V7 212
chopin_mazurkas MAJOR bigrams (1393 transition types, 5136 tokens)
from to
V7 I 336
I IV 96
V7 92
V 70
IV I 67
debussy_suite_bergamasque MAJOR bigrams (221 transition types, 327 tokens)
from to
I iii 8
V7 I 6
iii bIII6 5
iii6 V7 5
vii%65 iii6 4
dvorak_silhouettes MAJOR bigrams (347 transition types, 1013 tokens)
from to
V7 I 64
I V7 45
V I 37
V2 I6 29
I I6 21
grieg_lyric_pieces MAJOR bigrams (1735 transition types, 4755 tokens)
from to
V7 I 102
I V 39
V I 37
V7(9) I 31
I V7 30
liszt_pelerinage MAJOR bigrams (1471 transition types, 3323 tokens)
from to
V7 I 97
V I 47
I V 44
V7 42
I(9) I 41
medtner_tales MAJOR bigrams (1593 transition types, 2818 tokens)
from to
V7 I 50
V I 45
I V 27
IV 20
V7 17
schumann_kinderszenen MAJOR bigrams (256 transition types, 676 tokens)
from to
V7 I 26
I I6 20
V I 17
V65 I 16
I V43 15
tchaikovsky_seasons MAJOR bigrams (532 transition types, 1619 tokens)
from to
V7 I 38
V I 29
V2 I6 25
I(6#4) I 22
V65 I 22
beethoven_piano_sonatas MINOR bigrams (1785 transition types, 6227 tokens)
from to
V i 200
V7 i 160
V(64) V 109
i V 94
V(64) V7 92
chopin_mazurkas MINOR bigrams (1129 transition types, 3625 tokens)
from to
V7 i 173
i V7 57
V V7 47
i 41
bII V43/bII 32
debussy_suite_bergamasque MINOR bigrams (477 transition types, 675 tokens)
from to
i(2) i 8
i i(2) 8
V7/III III 6
III V7/III 6
i43 V7/VII 5
dvorak_silhouettes MINOR bigrams (170 transition types, 427 tokens)
from to
V7 i 35
V i 18
i V7 16
i6 V64 9
iv 8
grieg_lyric_pieces MINOR bigrams (1218 transition types, 3178 tokens)
from to
V i 65
V7 i 59
i V 31
V7 27
V(4) V 20
liszt_pelerinage MINOR bigrams (812 transition types, 1515 tokens)
from to
V i 36
i V 27
V7 i 21
V6 i 19
i i6 19
medtner_tales MINOR bigrams (2249 transition types, 3731 tokens)
from to
V7 i 58
i i6 41
i6 i 35
V i 26
i ii%7 20
schumann_kinderszenen MINOR bigrams (76 transition types, 175 tokens)
from to
V7 i 11
i V6 10
V i 9
i iv 8
V(64) V7 7
tchaikovsky_seasons MINOR bigrams (310 transition types, 1174 tokens)
from to
V7 i 58
V65 i 38
V i 24
i V7 23
VI 22
normalized_corpus_unigrams = {group: (100 * ugs / ugs.sum()).round(1).rename("frequency") for group, ugs in corpus_wise_unigrams.iter_group_results()}
transitions_from_shared_types = {
False: {},
True: {}
}
for (is_minor, corpus_name), bgs in corpus_wise_bigrams.iter_group_results():
transitions_normalized_per_from = bgs.groupby(level="from", group_keys=False).apply(lambda S: (100 * S / S.sum()).round(1))
most_frequent_transition_per_from = transitions_normalized_per_from.rename('fraction').reset_index(level=1).groupby(level=0).nth(0)
most_frequent_transition_per_shared = most_frequent_transition_per_from.loc[types_shared_between_corpora[(is_minor,)]]
unigram_frequency_of_shared = normalized_corpus_unigrams[(is_minor, corpus_name)].loc[types_shared_between_corpora[(is_minor,)]]
combined = pd.concat([unigram_frequency_of_shared, most_frequent_transition_per_shared], axis=1)
transitions_from_shared_types[is_minor][corpus_name] = combined
For the labels that the major segments of all corpora share, show
the unigram frequency of the given label in that corpus
the chord that the label transitions to most frequently
the percentage of this transition from all transitions from the label in question
pd.concat(transitions_from_shared_types[False].values(), keys=transitions_from_shared_types[False].keys(), axis=1)
| beethoven_piano_sonatas | chopin_mazurkas | debussy_suite_bergamasque | dvorak_silhouettes | grieg_lyric_pieces | liszt_pelerinage | medtner_tales | schumann_kinderszenen | tchaikovsky_seasons | |||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | |
| I | 14.6 | V | 11.0 | 18.1 | IV | 11.0 | 7.2 | iii | 38.1 | 23.8 | V7 | 19.7 | 11.6 | V | 8.1 | 14.5 | V | 9.8 | 9.2 | V | 10.6 | 14.8 | I6 | 23.0 | 19.0 | I(6#4) | 8.6 |
| V7 | 6.8 | I | 51.8 | 12.5 | I | 51.8 | 6.3 | I | 30.0 | 8.3 | I | 72.7 | 5.4 | I | 39.7 | 7.2 | I | 40.1 | 4.3 | I | 41.0 | 7.0 | I | 55.3 | 5.8 | I | 38.8 |
| V | 7.8 | I | 27.3 | 4.8 | V7 | 23.2 | 1.2 | IV | 75.0 | 7.4 | I | 50.7 | 4.6 | I | 16.7 | 4.5 | I | 30.7 | 4.9 | I | 33.1 | 9.1 | I | 29.3 | 4.0 | I | 43.3 |
| I6 | 7.4 | I | 19.1 | 1.3 | I | 23.9 | 2.7 | vi64 | 11.1 | 9.7 | I | 16.8 | 2.4 | vi | 11.3 | 3.2 | I | 21.8 | 2.9 | ii | 16.9 | 9.7 | V | 21.7 | 4.3 | I | 12.5 |
| IV | 2.6 | I6 | 17.1 | 3.5 | I | 36.6 | 3.0 | iii | 40.0 | 4.3 | I6 | 24.4 | 2.6 | I | 17.3 | 3.1 | I | 34.6 | 2.1 | I6 | 10.0 | 3.2 | ii7 | 18.2 | 1.0 | IV6 | 40.0 |
| V43 | 2.9 | I | 36.4 | 0.3 | I | 55.6 | 0.9 | V7 | 33.3 | 1.1 | I | 25.0 | 1.6 | I | 20.8 | 0.5 | I | 21.1 | 0.5 | V7 | 25.0 | 4.2 | V2(9) | 26.7 | 3.1 | I | 39.6 |
| V2 | 2.9 | I6 | 54.2 | 0.7 | I6 | 50.0 | 1.8 | iii | 66.7 | 3.0 | I6 | 90.6 | 0.8 | vi43 | 25.6 | 0.7 | I6 | 27.3 | 0.7 | I6 | 50.0 | 1.4 | I6 | 100.0 | 2.0 | I6 | 75.8 |
| V6 | 3.1 | I | 24.8 | 0.5 | IV6 | 28.0 | 1.2 | IV6 | 50.0 | 0.7 | I | 75.0 | 0.5 | I | 24.0 | 0.9 | I | 40.0 | 0.8 | I | 20.8 | 0.7 | V7 | 80.0 | 2.8 | I | 40.0 |
| vi | 1.5 | ii6 | 10.3 | 1.4 | IV6(11) | 8.2 | 1.2 | I6 | 25.0 | 1.9 | V65/V | 23.5 | 2.4 | IV | 23.3 | 2.5 | vi(9) | 16.7 | 1.5 | #viio6/vi | 7.0 | 1.3 | ii6 | 37.5 | 1.3 | bVI+ | 17.4 |
| ii | 1.6 | V7 | 9.8 | 1.4 | V7 | 18.9 | 3.0 | I+ | 20.0 | 1.4 | viio6 | 20.0 | 1.1 | V64 | 15.1 | 2.0 | I | 18.6 | 1.8 | ii7(9) | 14.0 | 1.8 | viio6 | 30.8 | 2.1 | V7 | 17.6 |
| ii6 | 2.6 | V(64) | 24.4 | 1.3 | V7 | 39.4 | 0.9 | viio6 | 66.7 | 0.5 | I | 50.0 | 0.3 | V6 | 14.3 | 0.8 | V(64) | 14.3 | 0.7 | ii7(9) | 15.8 | 1.4 | V(64) | 40.0 | 0.5 | V(64) | 33.3 |
| V65 | 2.2 | I | 52.9 | 0.9 | I | 43.8 | 1.5 | iii64 | 40.0 | 0.8 | V7 | 55.6 | 0.6 | I | 19.4 | 1.0 | I | 42.4 | 0.4 | I | 41.7 | 2.7 | I | 84.2 | 2.3 | I | 56.4 |
| IV6 | 1.7 | iii6 | 15.2 | 0.9 | I | 18.8 | 3.6 | vi7 | 16.7 | 0.7 | viio65/V | 28.6 | 0.9 | V7 | 20.0 | 1.1 | bV6 | 10.8 | 0.9 | vi2 | 14.8 | 1.4 | viio2 | 20.0 | 1.0 | I | 53.3 |
| I64 | 1.4 | I | 21.0 | 0.9 | viio7(4) | 25.0 | 0.9 | IV(+6) | 33.3 | 0.2 | I6 | 100.0 | 1.4 | I | 37.3 | 1.2 | I | 31.7 | 1.3 | I | 10.8 | 1.0 | V(4) | 28.6 | 1.8 | V7 | 25.8 |
| viio6 | 1.8 | I | 33.9 | 0.1 | I | 50.0 | 1.8 | IV64 | 33.3 | 2.7 | V | 31.0 | 0.2 | I | 30.0 | 0.0 | iv6(b9) | 100.0 | 0.5 | I | 35.7 | 0.6 | I6 | 100.0 | 0.1 | I | 100.0 |
| ii7 | 0.2 | I6 | 41.7 | 0.6 | V7 | 35.5 | 2.7 | viio6 | 37.5 | 0.7 | I | 37.5 | 2.0 | V7 | 15.1 | 1.0 | V7 | 22.6 | 1.7 | V7 | 20.8 | 1.4 | V | 30.0 | 0.3 | V7(4) | 80.0 |
| V7/IV | 0.5 | IV | 37.7 | 1.2 | IV | 45.9 | 0.3 | ii65(2) | 100.0 | 0.6 | IV | 40.0 | 0.4 | IV | 40.0 | 1.0 | IV | 23.5 | 0.8 | ii%7/IV | 13.0 | 1.3 | IV | 66.7 | 0.2 | viio43 | 100.0 |
| V64 | 0.5 | I | 37.5 | 0.5 | v | 15.4 | 1.2 | I(72) | 100.0 | 0.1 | I | 100.0 | 0.8 | V | 61.0 | 0.3 | I | 66.7 | 0.5 | V | 41.7 | 1.4 | V64(6) | 40.0 | 0.8 | I | 57.1 |
| ii65 | 0.5 | V65/V | 18.6 | 0.6 | I | 36.7 | 2.7 | ii43 | 22.2 | 0.3 | I | 100.0 | 0.6 | ii7 | 45.2 | 0.2 | ii7(9) | 33.3 | 0.8 | ii65(4) | 16.7 | 1.1 | V | 25.0 | 0.5 | I+6 | 88.9 |
| vi6 | 0.6 | V6 | 29.3 | 0.5 | ii7 | 20.8 | 0.6 | IV6 | 50.0 | 0.7 | I | 62.5 | 0.3 | V6 | 15.4 | 0.3 | ii(11) | 27.3 | 0.8 | iii2 | 13.0 | 0.6 | V/V | 100.0 | 0.3 | iii | 40.0 |
| iii | 0.3 | V43 | 17.8 | 0.9 | #viio(b3)/iii | 11.4 | 5.7 | bIII6 | 27.8 | 0.4 | IV | 50.0 | 0.7 | iii7 | 23.3 | 0.1 | vi | 60.0 | 0.4 | V6 | 16.7 | 1.0 | vi6 | 57.1 | 0.3 | iii6 | 33.3 |
| V7(4) | 0.3 | V7 | 92.2 | 0.2 | V7 | 70.0 | 0.6 | V7 | 100.0 | 0.3 | V7 | 100.0 | 0.3 | V7 | 78.6 | 1.0 | V7 | 72.2 | 0.6 | I | 35.3 | 0.4 | V7 | 100.0 | 0.6 | V7 | 60.0 |
| V/V | 0.3 | V | 31.9 | 0.1 | V | 100.0 | 0.3 | V7 | 100.0 | 0.4 | ii | 100.0 | 0.2 | I/V | 33.3 | 0.1 | I/V | 100.0 | 0.3 | V2 | 50.0 | 0.6 | V | 100.0 | 0.2 | V7(+b9) | 66.7 |
| ii2 | 0.2 | V6 | 20.0 | 0.1 | ii2(9) | 28.6 | 0.9 | vii%7 | 33.3 | 0.2 | I | 100.0 | 0.5 | V65 | 26.9 | 0.1 | IV | 33.3 | 0.1 | ii43 | 66.7 | 0.1 | ii65 | 100.0 | 0.5 | V6 | 22.2 |
| ii64 | 0.2 | V7 | 25.7 | 0.2 | V65 | 50.0 | 1.2 | ii43 | 50.0 | 0.1 | V65 | 100.0 | 0.1 | IM7 | 57.1 | 0.0 | v6 | 100.0 | 0.2 | V6/ii | 33.3 | 0.6 | V65 | 100.0 | 0.4 | viio2 | 40.0 |
| #viio7/vi | 0.3 | vi | 66.7 | 0.2 | vi | 27.3 | 0.3 | IV6 | 100.0 | 0.2 | vi | 100.0 | 0.0 | vi | 100.0 | 0.2 | IV64 | 37.5 | 0.1 | I/IV | 50.0 | 0.1 | vi | 100.0 | 0.3 | vi | 80.0 |
For the labels that the minor segments of all corpora share, show
the unigram frequency of the given label in that corpus
the chord that the label transitions to most frequently
the percentage of this transition from all transitions from the label in question
pd.concat(transitions_from_shared_types[True].values(), keys=transitions_from_shared_types[False].keys(), axis=1)
| beethoven_piano_sonatas | chopin_mazurkas | debussy_suite_bergamasque | dvorak_silhouettes | grieg_lyric_pieces | liszt_pelerinage | medtner_tales | schumann_kinderszenen | tchaikovsky_seasons | |||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | frequency | to | fraction | |
| i | 13.4 | V | 12.1 | 13.8 | V7 | 12.3 | 8.4 | i(2) | 14.8 | 22.0 | V7 | 20.3 | 11.7 | V | 9.5 | 10.1 | V | 19.3 | 9.1 | i6 | 12.8 | 22.8 | V6 | 31.2 | 19.6 | V7 | 12.0 |
| V | 9.2 | i | 34.6 | 6.7 | V7 | 19.2 | 1.3 | V7 | 22.2 | 9.2 | i | 46.2 | 6.5 | i | 31.6 | 6.3 | i | 40.0 | 3.4 | i | 20.6 | 6.6 | i | 75.0 | 5.7 | i | 38.1 |
| V7 | 6.1 | i | 41.6 | 9.2 | i | 52.0 | 1.2 | i | 25.0 | 10.9 | i | 72.9 | 3.4 | i | 53.6 | 3.8 | i | 35.6 | 2.9 | i | 53.7 | 7.6 | i | 73.3 | 8.7 | i | 54.2 |
| i6 | 5.5 | i | 18.8 | 1.6 | i | 35.0 | 2.4 | iv | 12.5 | 7.4 | V64 | 26.5 | 2.5 | ii%7 | 8.8 | 6.3 | V6 | 10.6 | 4.6 | i | 20.1 | 4.6 | #viio65 | 33.3 | 6.9 | i | 12.9 |
| iv | 3.2 | i6 | 12.6 | 1.6 | i | 28.8 | 2.2 | i | 13.3 | 4.4 | i | 40.0 | 1.8 | V | 10.7 | 1.2 | #viio65 | 15.8 | 2.0 | i6 | 9.2 | 4.6 | V(64) | 50.0 | 1.7 | iio | 20.0 |
| VI | 1.5 | V | 18.3 | 0.8 | III | 20.7 | 2.5 | VI64 | 17.6 | 2.2 | VI6 | 30.0 | 1.4 | i | 28.9 | 0.9 | i | 28.6 | 1.9 | VIM7 | 10.0 | 0.5 | V/III | 100.0 | 2.5 | iv65 | 37.5 |
| V65 | 1.8 | i | 53.4 | 0.7 | i | 56.0 | 0.4 | V43(2) | 33.3 | 0.7 | i | 100.0 | 0.1 | I | 50.0 | 0.8 | i | 46.2 | 0.2 | i6 | 33.3 | 1.5 | i | 100.0 | 4.2 | i | 71.7 |
| #viio43 | 1.8 | i6 | 31.3 | 0.3 | i | 44.4 | 0.1 | iv | 100.0 | 0.2 | #viio65 | 100.0 | 0.6 | #viio65 | 25.0 | 1.5 | i6(#7) | 18.2 | 0.4 | I6 | 18.8 | 4.1 | i6 | 62.5 | 1.0 | i | 54.5 |
| V7/V | 0.1 | V | 71.4 | 2.1 | V7 | 28.6 | 0.1 | ii%43/V | 100.0 | 0.9 | V | 100.0 | 0.7 | V7 | 33.3 | 0.3 | V6 | 100.0 | 0.5 | ii%43(11) | 11.1 | 0.5 | V | 100.0 | 0.1 | V(4) | 100.0 |
| iv64 | 0.4 | #viio7 | 45.8 | 1.3 | i | 31.9 | 0.9 | vii%2/III | 33.3 | 0.2 | iv | 100.0 | 0.6 | i | 26.3 | 0.4 | #viio43/iv | 33.3 | 0.7 | bII6/iv | 10.7 | 0.5 | #viio7 | 100.0 | 0.4 | i | 60.0 |
Per piece¶
piece_wise_bigrams = dc.Pipeline([dc.PieceGrouper(), dc.ChordSymbolBigrams()]).process_data(mode_slices)
piece_wise_bigrams.get_group_results()
WARNING:dimcat.data.base:ChordSymbolBigrams yielded no result for group (True, '23-2')
| V(64) | viio7/V | ii6(2) | V65/V | V7(+b9) | V7 | ii6 | V7 | I | V | V6 | V43/V | V2 | V7 | I6 | V7(b9) | I6 | ii6 | viio43 | I64 | I6 | iio64 | viio6 | ii7 | V7 | iii(0) | viio64 | V7(b2) | I | V7 | IV | I | I(974) | I6 | I64 | IV(0) | V43 | IVM2 | V | V(64) | I | V43 | V7(4) | V(64) | I | ... | i/ii | IV/v | IV/ii | V/v | V/ii | V2/V | iv64 | #vi | V/iii | VII | bII | V/bv | v | vi%7 | vi%7/#vi | vi%7/bv | V/iii | V/bv | #viio2 | V/#vi | #viio2 | #viio43 | #viio65 | #viio7 | III | IV | V | V(+4) | vi%7/iii | #VII6 | V/V | ii%7/bV | bII64 | Ger2/iv | I64 | v6 | i(64) | V(6) | V7(b5) | V(#7) | i | v6/vii | V(964) | ii%65 | ii%7 | ii%65 | |||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| V7 | V(64) | ii6 | V | V7 | I64 | V65/V | I | viio7/V | I6 | I | V | I6 | V7(b9) | ii6 | V7 | V6 | V(64) | I6 | V7(+b9) | ii6(2) | V7 | iii(0) | V43 | V7(b2) | V2 | iii | viio6 | V7 | V43 | I(974) | viio64 | V65 | ii6(2) | I | IV(0) | viio43 | I6 | IV | I6 | ii7 | V43/V | V7(+b9) | viio43 | V2 | V43/V | I | V7 | V | IV6 | ... | IV/ii | i/v | i/ii | i/ii | i/#vi | iv64 | V(+4) | VI | vi%7/iii | #vi | VI | vi%7/bv | #viio43 | #viio2 | #viio43 | #viio65 | #viio43 | #viio2 | #viio2 | vi%7/#vi | V/#vi | V/iii | V/bv | V/#vi | V/iii | #viio7 | V/bv | VII | bII | III | vi%7 | III | #viio7 | ii%7/bV | bII64 | Ger2/iv | I64 | V7(b5) | #VII6 | iv6(4) | I(6) | V(#7) | V(94) | V7(b5) | v6/vii | iv6 | ii%65 | VIM43 | V(964) | #viio2 | ||
| localkey_is_minor | fname | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| False | 01-1 | 4 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 01-2 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 6 | 0 | 4 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 6 | 6 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 01-3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 01-4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 02-1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | 0 | 5 | 3 | 0 | 9 | 0 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 5 | 0 | 1 | 4 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| True | op38n01 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| op71n04 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| 161.01_Sposalizio | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| op12n01 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
| op71n07 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 1 | |
410 rows × 14058 columns